mwmitchell-rsolr 0.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. data/CHANGES.txt +41 -0
  2. data/LICENSE +201 -0
  3. data/README.rdoc +191 -0
  4. data/Rakefile +40 -0
  5. data/examples/direct.rb +20 -0
  6. data/examples/http.rb +16 -0
  7. data/lib/core_ext.rb +8 -0
  8. data/lib/rsolr.rb +34 -0
  9. data/lib/rsolr/connection.rb +7 -0
  10. data/lib/rsolr/connection/adapter.rb +7 -0
  11. data/lib/rsolr/connection/adapter/common_methods.rb +46 -0
  12. data/lib/rsolr/connection/adapter/direct.rb +80 -0
  13. data/lib/rsolr/connection/adapter/http.rb +51 -0
  14. data/lib/rsolr/connection/base.rb +121 -0
  15. data/lib/rsolr/connection/search_ext.rb +126 -0
  16. data/lib/rsolr/http_client.rb +115 -0
  17. data/lib/rsolr/http_client/adapter.rb +6 -0
  18. data/lib/rsolr/http_client/adapter/curb.rb +51 -0
  19. data/lib/rsolr/http_client/adapter/net_http.rb +48 -0
  20. data/lib/rsolr/indexer.rb +23 -0
  21. data/lib/rsolr/mapper.rb +62 -0
  22. data/lib/rsolr/mapper/rss.rb +29 -0
  23. data/lib/rsolr/message.rb +73 -0
  24. data/lib/rsolr/response.rb +8 -0
  25. data/lib/rsolr/response/base.rb +33 -0
  26. data/lib/rsolr/response/index_info.rb +22 -0
  27. data/lib/rsolr/response/query.rb +170 -0
  28. data/lib/rsolr/response/update.rb +4 -0
  29. data/test/connection/direct_test.rb +22 -0
  30. data/test/connection/http_test.rb +19 -0
  31. data/test/connection/search_ext_test_methods.rb +17 -0
  32. data/test/connection/test_methods.rb +122 -0
  33. data/test/http_client/curb_test.rb +19 -0
  34. data/test/http_client/net_http_test.rb +13 -0
  35. data/test/http_client/test_methods.rb +40 -0
  36. data/test/http_client/util_test.rb +40 -0
  37. data/test/mapper_test.rb +123 -0
  38. data/test/message_test.rb +87 -0
  39. data/test/pagination_test.rb +58 -0
  40. data/test/ruby-lang.org.rss.xml +391 -0
  41. data/test/test_helpers.rb +39 -0
  42. metadata +107 -0
@@ -0,0 +1,115 @@
1
+ #require 'uri'
2
+
3
+ # A simple wrapper for different http client implementations.
4
+ # Supports #get and #post
5
+ # This was motivated by: http://apocryph.org/2008/11/09/more_indepth_analysis_ruby_http_client_performance/
6
+ # Net::HTTP is the default adapter
7
+
8
+ # Each adapter response should be a hash with the following keys:
9
+ # :status_code
10
+ # :url
11
+ # :body
12
+ # :path
13
+ # :params
14
+ # :data
15
+ # :headers
16
+
17
+ # Example:
18
+ # hclient = RSolr::HTTPClient.connect('http://www.google.com', :net_http)
19
+ # response = hclient.get('/search', :hl=>:en, :q=>:ruby, :btnG=>:Search)
20
+ # puts response[:status_code]
21
+ # puts response[:body]
22
+
23
+ module RSolr::HTTPClient
24
+
25
+ autoload :Adapter, 'rsolr/http_client/adapter'
26
+
27
+ class UnkownAdapterError < RuntimeError; end
28
+
29
+ def self.connect(url, adapter_name=:net_http)
30
+ case adapter_name
31
+ when :curb
32
+ klass = 'Curb'
33
+ when :net_http
34
+ klass = 'NetHTTP'
35
+ else
36
+ raise UnkownAdapterError.new("Name: #{adapter_name}")
37
+ end
38
+ Base.new RSolr::HTTPClient::Adapter.const_get(klass).new(url)
39
+ end
40
+
41
+ class Base
42
+
43
+ attr_reader :adapter
44
+
45
+ def initialize(adapter)
46
+ @adapter = adapter
47
+ end
48
+
49
+ def get(path, params={})
50
+ begin
51
+ http_context = @adapter.get(path, params)
52
+ rescue
53
+ raise RSolr::RequestError.new($!)
54
+ end
55
+ http_context
56
+ end
57
+
58
+ def post(path, data, params={}, headers={})
59
+ begin
60
+ http_context = @adapter.post(path, data, params, headers)
61
+ rescue
62
+ raise RSolr::RequestError.new($!)
63
+ end
64
+ http_context
65
+ end
66
+
67
+ end
68
+
69
+ module Util
70
+
71
+ # escapes a query key/value for http
72
+ def escape(s)
73
+ s.to_s.gsub(/([^ a-zA-Z0-9_.-]+)/n) {
74
+ '%'+$1.unpack('H2'*$1.size).join('%').upcase
75
+ }.tr(' ', '+')
76
+ end
77
+
78
+ def build_url(url='', params={}, string_query='')
79
+ queries = [string_query, hash_to_params(params)]
80
+ queries.delete_if{|i| i.to_s.empty?}
81
+ url += "?#{queries.join('&')}" unless queries.empty?
82
+ url
83
+ end
84
+
85
+ def build_param(k,v)
86
+ "#{escape(k)}=#{escape(v)}"
87
+ end
88
+
89
+ #
90
+ # converts hash into URL query string, keys get an alpha sort
91
+ # if a value is an array, the array values get mapped to the same key:
92
+ # hash_to_params(:q=>'blah', 'facet.field'=>['location_facet', 'format_facet'])
93
+ # returns:
94
+ # ?q=blah&facet.field=location_facet&facet.field=format.facet
95
+ #
96
+ # if a value is empty/nil etc., the key is not added
97
+ def hash_to_params(params)
98
+ return unless params.is_a?(Hash)
99
+ # copy params and convert keys to strings
100
+ params = params.inject({}){|acc,(k,v)| acc.merge({k.to_s, v}) }
101
+ # get sorted keys
102
+ params.keys.sort.inject([]) do |acc,k|
103
+ v = params[k]
104
+ if v.is_a?(Array)
105
+ acc << v.reject{|i|i.to_s.empty?}.collect{|vv|build_param(k, vv)}
106
+ elsif ! v.to_s.empty?
107
+ acc.push(build_param(k, v))
108
+ end
109
+ acc
110
+ end.join('&')
111
+ end
112
+
113
+ end
114
+
115
+ end
@@ -0,0 +1,6 @@
1
+ module RSolr::HTTPClient::Adapter
2
+
3
+ autoload :Curb, 'rsolr/http_client/adapter/curb'
4
+ autoload :NetHTTP, 'rsolr/http_client/adapter/net_http'
5
+
6
+ end
@@ -0,0 +1,51 @@
1
+ require 'rubygems'
2
+ require 'curb'
3
+
4
+ class RSolr::HTTPClient::Adapter::Curb
5
+
6
+ include RSolr::HTTPClient::Util
7
+
8
+ attr :uri
9
+ attr :c
10
+
11
+ def initialize(url)
12
+ @uri = URI.parse(url)
13
+ @c = ::Curl::Easy.new
14
+ end
15
+
16
+ def get(path, params={})
17
+ @c.url = _build_url(path, params)
18
+ @c.multipart_form_post = false
19
+ @c.perform
20
+ create_http_context(path, params)
21
+ end
22
+
23
+ def post(path, data, params={}, headers={})
24
+ @c.url = _build_url(path, params)
25
+ @c.headers = headers
26
+ @c.http_post(data)
27
+ create_http_context(path, params, data, headers)
28
+ end
29
+
30
+ protected
31
+
32
+ def create_http_context(path, params, data=nil, headers={})
33
+ {
34
+ :status_code=>@c.response_code.to_i,
35
+ :url=>@c.url,
36
+ :body=>@c.body_str,
37
+ :path=>path,
38
+ :params=>params,
39
+ :data=>data,
40
+ :headers=>headers
41
+ }
42
+ end
43
+
44
+ def _build_url(path, params={})
45
+ url = @uri.scheme + '://' + @uri.host
46
+ url += ':' + @uri.port.to_s if @uri.port
47
+ url += @uri.path + path
48
+ build_url(url, params, @uri.query)
49
+ end
50
+
51
+ end
@@ -0,0 +1,48 @@
1
+ require 'net/http'
2
+
3
+ class RSolr::HTTPClient::Adapter::NetHTTP
4
+
5
+ include RSolr::HTTPClient::Util
6
+
7
+ attr :uri
8
+ attr :c
9
+
10
+ def initialize(url)
11
+ @uri = URI.parse(url)
12
+ @c = Net::HTTP.new(@uri.host, @uri.port)
13
+ end
14
+
15
+ def get(path, params={})
16
+ url = _build_url(path, params)
17
+ net_http_response = @c.get(url)
18
+ create_http_context(net_http_response, url, path, params)
19
+ end
20
+
21
+ def post(path, data, params={}, headers={})
22
+ url = _build_url(path, params)
23
+ net_http_response = @c.post(url, data, headers)
24
+ create_http_context(net_http_response, url, path, params, data, headers)
25
+ end
26
+
27
+ protected
28
+
29
+ def create_http_context(net_http_response, url, path, params, data=nil, headers={})
30
+ full_url = "#{@uri.scheme}://#{@uri.host}"
31
+ full_url += @uri.port ? ":#{@uri.port}" : ''
32
+ full_url += url
33
+ {
34
+ :status_code=>net_http_response.code.to_i,
35
+ :body=>net_http_response.body,
36
+ :url=>full_url,
37
+ :path=>path,
38
+ :params=>params,
39
+ :data=>data,
40
+ :headers=>headers
41
+ }
42
+ end
43
+
44
+ def _build_url(path, params={})
45
+ build_url(@uri.path + path, params, @uri.query)
46
+ end
47
+
48
+ end
@@ -0,0 +1,23 @@
1
+ class RSolr::Indexer
2
+
3
+ attr_reader :solr, :mapper, :opts
4
+
5
+ def initialize(solr, mapping_or_mapper, opts={})
6
+ @solr = solr
7
+ @mapper = mapping_or_mapper.is_a?(Hash) ? RSolr::Mapper::Base.new(mapping_or_mapper) : mapping_or_mapper
8
+ @opts = opts
9
+ end
10
+
11
+ # data - the raw data to send into the mapper
12
+ # params - url query params for solr /update handler
13
+ # commit - boolean; true==commit after adding, false==no commit after adding
14
+ # block can be used for modifying the "add", "doc" and "field" xml elements (for boosting etc.)
15
+ def index(data, params={}, &block)
16
+ docs = data.collect {|d| @mapper.map(d)}
17
+ @solr.add(docs, params) do |add, doc, field|
18
+ # check opts for :debug etc.?
19
+ yield add, doc, field if block_given?
20
+ end
21
+ end
22
+
23
+ end
@@ -0,0 +1,62 @@
1
+ module RSolr::Mapper
2
+
3
+ autoload :RSS, 'rsolr/mapper/rss'
4
+
5
+ class UnkownMappingValue < RuntimeError; end
6
+
7
+ class Base
8
+
9
+ attr_reader :mapping, :opts
10
+
11
+ def initialize(mapping={}, opts={}, &block)
12
+ @mapping = mapping
13
+ @opts = opts
14
+ yield @mapping if block_given?
15
+ end
16
+
17
+ # source - a hash or array of source data
18
+ # override_mapping - an alternate mapper
19
+ # returns an array with one or more mapped hashes
20
+ def map(source, override_mapping=nil, &block)
21
+ source = [source] if source.is_a?(Hash)
22
+ mapping = override_mapping || @mapping
23
+ index = -1
24
+ # collect a bunch of hashes...
25
+ source.collect do |src|
26
+ index += 1
27
+ # for each mapping item, inject data into a new hash
28
+ final_hash = mapping.inject({}) do |a_new_hash, (map_key, map_value)|
29
+ value = mapped_field_value(src, map_value, index)
30
+ value.to_s.empty? ? a_new_hash : a_new_hash.merge!({map_key=>value})
31
+ end
32
+ yield final_hash if block_given?
33
+ final_hash
34
+ end
35
+ end
36
+
37
+ protected
38
+
39
+ # This is a hook method useful for subclassing
40
+ def source_field_value(source, field_name, index)
41
+ source[field_name]
42
+ end
43
+
44
+ def mapped_field_value(source, mapped_value, index)
45
+ case mapped_value
46
+ when String
47
+ mapped_value
48
+ when Symbol
49
+ source_field_value(source, mapped_value, index)
50
+ when Proc
51
+ mapped_value.call(source, index)
52
+ when Enumerable
53
+ mapped_value.collect {|key| source_field_value(source, key, index)}.flatten
54
+ else
55
+ # try to turn it into a string, else raise UnkownMappingValue
56
+ mapped_value.respond_to?(:to_s) ? mapped_value.to_s : raise(UnkownMappingValue.new(mapped_value))
57
+ end
58
+ end
59
+
60
+ end
61
+
62
+ end
@@ -0,0 +1,29 @@
1
+ #TODO - this could use the http wrapper stuff instead of open-uri/net::http
2
+
3
+ require 'rss'
4
+ require 'open-uri'
5
+
6
+ class RSolr::Mapper::RSS < RSolr::Mapper::Base
7
+
8
+ attr_reader :rss
9
+
10
+ # rss_file_or_url is file path or url (see open-uri)
11
+ # override_mapping is an alternate mapping (see Solr::Mapper::Base)
12
+ # returns array of mapped hashes
13
+ def map(rss_file_or_url, override_mapping=nil)
14
+ open(rss_file_or_url) do |feed|
15
+ @rss = RSS::Parser.parse(feed.read, false)
16
+ super(rss.items.collect, override_mapping)
17
+ end
18
+ end
19
+
20
+ # sends methods chain down into the @rss object
21
+ # example: :'channel.title' == @rss.channel.title
22
+ # if the method chain doesn't exist, the super #source_field_value method is called
23
+ def source_field_value(source, method_path, index)
24
+ method_path.to_s.split('.').inject(@rss) do |rss, m|
25
+ rss.respond_to?(m) ? rss.send(m.to_sym) : super(source, method_path, index)
26
+ end
27
+ end
28
+
29
+ end
@@ -0,0 +1,73 @@
1
+
2
+ # http://builder.rubyforge.org/
3
+ require 'rubygems'
4
+ require 'builder'
5
+
6
+ # The Solr::Message class is the XML generation module for sending updates to Solr.
7
+
8
+ class RSolr::Message
9
+
10
+ class << self
11
+
12
+ def xml
13
+ Builder::XmlMarkup.new
14
+ end
15
+
16
+ # add({})
17
+ # add([{}, {}])
18
+ # add(docs) do |doc|
19
+ # doc.boost = 10.0
20
+ # end
21
+ def add(data, opts={}, &block)
22
+ data = [data] if data.respond_to?(:each_pair) # if it's a hash, put it in an array
23
+ xml.add(opts) do |add_xml|
24
+ data.each do |item|
25
+ add_xml.doc do |doc_xml|
26
+ # convert keys into strings and perform an alpha sort (easier testing between ruby and jruby)
27
+ # but probably not great for performance? whatever...
28
+ sorted_items = item.inject({}) {|acc,(k,v)| acc.merge({k.to_s=>v})}
29
+ sorted_items.keys.sort.each do |k|
30
+ doc_attrs = {:name=>k}
31
+ yield item, doc_attrs if block_given?
32
+ [sorted_items[k]].flatten.each do |v| # multiValued attributes
33
+ doc_xml.field(v, doc_attrs)
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
40
+
41
+ def commit(opts={})
42
+ xml.commit(opts)
43
+ end
44
+
45
+ def optimize(opts={})
46
+ xml.optimize(opts)
47
+ end
48
+
49
+ def rollback
50
+ xml.rollback
51
+ end
52
+
53
+ def delete_by_id(ids)
54
+ ids = [ids] unless ids.is_a?(Array)
55
+ xml.delete do |xml|
56
+ ids.each do |id|
57
+ xml.id(id)
58
+ end
59
+ end
60
+ end
61
+
62
+ def delete_by_query(queries)
63
+ queries = [queries] unless queries.is_a?(Array)
64
+ xml.delete do |xml|
65
+ queries.each do |query|
66
+ xml.query(query)
67
+ end
68
+ end
69
+ end
70
+
71
+ end
72
+
73
+ end
@@ -0,0 +1,8 @@
1
+ module RSolr::Response
2
+
3
+ autoload :Base, 'rsolr/response/base'
4
+ autoload :Query, 'rsolr/response/query'
5
+ autoload :IndexInfo, 'rsolr/response/index_info'
6
+ autoload :Update, 'rsolr/response/update'
7
+
8
+ end
@@ -0,0 +1,33 @@
1
+ # default/base response object
2
+ # This is where the ruby "eval" happens
3
+ # So far, all response classes extend this
4
+ class RSolr::Response::Base
5
+
6
+ attr_reader :source
7
+
8
+ attr_reader :raw_response, :data, :header, :params, :status, :query_time
9
+
10
+ def initialize(data)
11
+ if data.is_a?(Hash) and data.has_key?(:body)
12
+ @raw_response = data[:body]
13
+ @data = Kernel.eval(@raw_response)
14
+ @source = data
15
+ else
16
+ if data.is_a?(String)
17
+ @raw_response = data
18
+ @data = Kernel.eval(@raw_response)
19
+ elsif data.is_a?(Hash)
20
+ @data = data
21
+ end
22
+ end
23
+ @header = @data['responseHeader']
24
+ @params = @header['params']
25
+ @status = @header['status']
26
+ @query_time = @header['QTime']
27
+ end
28
+
29
+ def ok?
30
+ self.status==0
31
+ end
32
+
33
+ end