delsolr 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Manifest.txt CHANGED
@@ -8,4 +8,6 @@ lib/delsolr/configuration.rb
8
8
  lib/delsolr/extensions.rb
9
9
  lib/delsolr/query_builder.rb
10
10
  lib/delsolr/response.rb
11
+ lib/delsolr/document.rb
11
12
  lib/delsolr/version.rb
13
+
data/README.txt CHANGED
@@ -15,6 +15,30 @@ while keeping the interface as ruby-esque as possible.
15
15
 
16
16
  See http://delsolr.rubyforge.org for more info
17
17
 
18
+ Example:
19
+
20
+ c = DelSolr::Client.new(:server => 'solr1', :port => 8983)
21
+ rsp = c.query('dismax', :query => 'mp3 player',
22
+ :filters => {:cost => (50..100)},
23
+ :facets => [{:field => 'brand', :limit => 10},
24
+ {:query => {:onsale => true, :brand => 'Apple'},
25
+ :name => 'cheap_apple'}])
26
+
27
+ # output total matches
28
+ puts rsp.total
29
+
30
+ # output each id with score
31
+ rsp.docs.each { |doc| puts "#{doc[:id]} - #{doc[:score]}" }
32
+
33
+ # output each value for a facet
34
+ rsp.facet_field_values('brand').each do |brand|
35
+ puts "#{brand}: #{rsp.facet_field_count('brand', brand}"
36
+ end
37
+
38
+ # output a query facet
39
+ puts "Cheap Apple stuff: #{rsp.facet_query_count_by_name('cheap_apple')}"
40
+
41
+
18
42
  == REQUIREMENTS:
19
43
 
20
44
  You need Solr installed somewhere so you can query it ;)
@@ -23,11 +47,22 @@ You need Solr installed somewhere so you can query it ;)
23
47
 
24
48
  sudo gem install delsolr
25
49
 
50
+ == TODO:
51
+
52
+ * finish unit tests (use mocha to stub out Net::HTTP)
53
+ * implement delete_by_query
54
+ * make thread safe
55
+ * it would be nice to be able to have things like commit/optimize be ran in threads on timers periodically
56
+ * right now a few things need to be locked
57
+ * connection
58
+ * pending_documents array
59
+
60
+
26
61
  == LICENSE:
27
62
 
28
63
  (The MIT License)
29
64
 
30
- Copyright (c) 2008 FIXME full name
65
+ Copyright (c) 2008 Avvo, INC - http://www.avvo.com
31
66
 
32
67
  Permission is hereby granted, free of charge, to any person obtaining
33
68
  a copy of this software and associated documentation files (the
@@ -1,11 +1,12 @@
1
1
  module DelSolr
2
2
  class Client
3
3
  class Configuration
4
- attr_accessor :server, :port
4
+ attr_accessor :server, :port, :timeout
5
5
 
6
- def initialize(server, port)
6
+ def initialize(server, port, timeout = 120)
7
7
  @server = server
8
8
  @port = port.to_i
9
+ @timeout = timeout || 120
9
10
  end
10
11
 
11
12
  end
@@ -0,0 +1,82 @@
1
+ module DelSolr
2
+ #
3
+ # DelSolr::Client::Document
4
+ #
5
+ # Generally, indexing consists of iterating over your database, creating a huge xml buffer, and
6
+ # posting it to solr. This wraps up the xml portion and DelSolr::Client wraps up the posting/batching
7
+ # portions.
8
+ #
9
+ # This is what your indexing logic might look like if you're dealing w/ ActiveRecord objects:
10
+ #
11
+ # client = DelSolr::Client.new(:server => 'solr1', :port => 8983, :timout => 500)
12
+ # models = SomeModel.find(:all)
13
+ # models.each do |model|
14
+ # doc = DelSolr::Document.new
15
+ # doc.add_field('id', model.id)
16
+ # doc.add_field('name', model.name)
17
+ # model.tags.each do |tag| # multiple tag fields
18
+ # doc.add_field('tag', tag.name)
19
+ # end
20
+ # client.update(doc) # batch the document update
21
+ # end
22
+ # client.post_update! # send the batch to solr
23
+ # client.commit! # send the commit so solr updates the index
24
+ #
25
+ # It's generally a good idea to experiment with different batch size. 500-2000 documents per post
26
+ # is a good starting point depending on how large your documents are.
27
+ #
28
+ # You also may want to just update a signle document when it is changed. Might looks like this:
29
+ #
30
+ # def after_save
31
+ # doc = DelSolr::Document.new
32
+ # doc.add_field('id', model.id)
33
+ # doc.add_field('name', model.name)
34
+ # $client.update_and_commit!(doc) # post the document and immediately post the commit
35
+ # end
36
+ #
37
+ #
38
+ class Document
39
+
40
+ # [<b><tt>field_mame</tt></b>]
41
+ # is the name of the field in your schema.xml
42
+ # [<b><tt>value</tt></b>]
43
+ # is the value of the field you wish to be indexed
44
+ # [<b><tt>options</tt></b>]
45
+ # <b><tt>:cdata</tt></b> set to true if you want the value wrap in a CDATA tag
46
+ #
47
+ # All other options are passed directly as xml attributes (see the solr documentation on usage)
48
+ def add_field(field_name, value, options = {})
49
+ field_buffer << construct_field_tag(field_name, value, options)
50
+ end
51
+
52
+ def xml
53
+ "<doc>\n" + field_buffer + "</doc>"
54
+ end
55
+
56
+ private
57
+
58
+ # creates xml field for given inputs
59
+ def construct_field_tag(name, value, options={})
60
+ options[:name] = name.to_s
61
+ use_cdata = options.delete(:cdata)
62
+ opts = []
63
+ options.each do |k,v|
64
+ opts.push "#{k}=\"#{v}\""
65
+ end
66
+ opts = opts.join(" ")
67
+ opts = " " + opts if opts
68
+
69
+ return "<field#{opts}>#{use_cdata ? cdata(value) : value}</field>\n"
70
+ end
71
+
72
+ def cdata(str)
73
+ "<![CDATA[#{str}]]>"
74
+ end
75
+
76
+ def field_buffer
77
+ @buffer ||= ""
78
+ end
79
+
80
+ end
81
+
82
+ end
@@ -6,6 +6,8 @@ module DelSolr
6
6
 
7
7
  class QueryBuilder
8
8
 
9
+ FL_DEFAULTS = 'id,unique_id,score' # redefine if you really want to change this.
10
+
9
11
  attr_accessor :query_name, :options
10
12
 
11
13
  # ops can basically be straight solr URL params, but it also supports some other formats
@@ -19,19 +21,19 @@ module DelSolr
19
21
  def request_string
20
22
  @request_string ||= build_request_string
21
23
  end
22
-
24
+
23
25
  # returns the query string of the facet query for the given query name (used for resolving counts for given queries)
24
26
  def facet_query_by_name(query_name)
25
27
  name_to_facet_query[query_name]
26
28
  end
27
29
 
28
30
  private
29
-
31
+
30
32
  def build_request_string()
31
33
  raise "query_name must be set" if query_name.blank?
32
-
34
+
33
35
  opts = self.options.dup
34
-
36
+
35
37
  # cleanup the nils
36
38
  opts.delete_if {|k,v| v.nil?}
37
39
 
@@ -39,7 +41,7 @@ module DelSolr
39
41
  opts[:q] ||= opts[:query]
40
42
  opts[:rows] ||= opts[:limit] || 10
41
43
  opts[:start] ||= opts[:offset] || 0
42
- opts[:fl] ||= opts[:fields] || 'id,unique_id,index_type,score'
44
+ opts[:fl] ||= opts[:fields] || FL_DEFAULTS
43
45
  opts[:bq] ||= opts[:boost]
44
46
  opts[:suggestionCount] ||= opts[:suggestion_count]
45
47
  opts[:onlyMorePopular] ||= opts[:only_more_popular]
@@ -4,7 +4,7 @@ module DelSolr
4
4
 
5
5
  class Response
6
6
 
7
- attr_reader :raw_response, :query_builder
7
+ attr_reader :query_builder
8
8
 
9
9
  def initialize(solr_response_buffer, query_builder, options = {})
10
10
  @query_builder = query_builder
@@ -26,91 +26,86 @@ module DelSolr
26
26
  end
27
27
  end
28
28
 
29
- # returns the total number of matches
29
+ # Rreturns the "raw" ruby hash that is returned by the solr ruby response writer. This is mostly for debugging purposes
30
+ def raw_response
31
+ @raw_response
32
+ end
33
+
34
+ # Returns the total number of matches
30
35
  def total
31
36
  @total ||= raw_response['response']['numFound']
32
37
  end
33
38
 
39
+ # Returns true if there no results
34
40
  def blank?
35
- total < 1
41
+ total.zero?
36
42
  end
37
43
 
38
44
  alias_method :empty?, :blank?
39
45
 
46
+ # Returns true if this response was pulled from the cache
40
47
  def from_cache?
41
48
  @from_cache
42
49
  end
43
50
 
44
- # returns the offset
51
+ # Returns the offset that was given in the request
45
52
  def offset
46
53
  @offset ||= raw_response['response']['start']
47
54
  end
48
55
 
49
- # returns the max score
56
+ # Returns the max score of the result set
50
57
  def max_score
51
58
  @max_score ||= raw_response['response']['maxScore'].to_f
52
59
  end
53
60
 
54
- # returns an array of all ids for the given search
55
- def ids
56
- @ids ||= docs.collect {|d| d['id']}
57
- end
58
-
59
- def unique_ids
60
- @unique_ids ||= docs.collect {|d| d['unique_id']}
61
- end
62
-
63
- # returns an array of all the docs
61
+ # Returns an array of all the docs
64
62
  def docs
65
63
  @docs ||= raw_response['response']['docs']
66
64
  end
67
65
 
68
- # helper for displaying a given field (first tries the highlight, then the stored value)
66
+ # Helper for displaying a given field (first tries the highlight, then the stored value)
69
67
  def display_for(doc, field)
70
68
  highlights_for(doc['unique_id'], field) || doc[field]
71
69
  end
72
70
 
73
- # returns the highlights for a given id for a given field
71
+ # Returns the highlights for a given id for a given field
74
72
  def highlights_for(unique_id, field)
75
73
  raw_response['highlighting'] ||= {}
76
74
  raw_response['highlighting'][unique_id] ||= {}
77
75
  raw_response['highlighting'][unique_id][field]
78
76
  end
79
77
 
80
- def suggestions
81
- @suggestions ||= raw_response['suggestions']
82
- end
83
-
84
- # returns the query time in ms
78
+ # Returns the query time in ms
85
79
  def qtime
86
80
  @qtime ||= raw_response['responseHeader']['QTime'].to_i
87
81
  end
88
82
 
89
- # returns the status code (0 for success)
83
+ # Returns the status code (0 for success)
90
84
  def status
91
85
  @status ||= raw_response['responseHeader']['status']
92
86
  end
93
87
 
94
- # returns the params hash
88
+ # Returns the params hash
95
89
  def params
96
90
  @params ||= raw_response['responseHeader']['params']
97
91
  end
98
92
 
99
- # returns the entire facet hash
93
+ # Returns the entire facet hash
100
94
  def facets
101
95
  @facets ||= raw_response['facet_counts'] || {}
102
96
  end
103
97
 
104
- # returns the hash of all the facet_fields (ie: {'instock_b' => ['true', 123, 'false', 20]}
98
+ # Returns the hash of all the facet_fields (ie: {'instock_b' => ['true', 123, 'false', 20]}
105
99
  def facet_fields
106
100
  @facet_fields ||= facets['facet_fields'] || {}
107
101
  end
108
102
 
103
+ # Returns all of the facet queries
109
104
  def facet_queries
110
105
  @facet_queries ||= facets['facet_queries'] || {}
111
106
  end
112
107
 
113
- # returns a hash of hashs rather than a hash of arrays (ie: {'instock_b' => {'true' => 123', 'false', => 20} })
108
+ # Returns a hash of hashs rather than a hash of arrays (ie: {'instock_b' => {'true' => 123', 'false', => 20} })
114
109
  def facet_fields_by_hash
115
110
  @facet_fields_by_hash ||= begin
116
111
  f = {}
@@ -128,12 +123,12 @@ module DelSolr
128
123
  end
129
124
  end
130
125
 
131
- # returns an array of value/counts for a given field (ie: ['true', 123, 'false', 20]
126
+ # Returns an array of value/counts for a given field (ie: ['true', 123, 'false', 20]
132
127
  def facet_field(field)
133
128
  facet_fields[field.to_s]
134
129
  end
135
130
 
136
- # returns the array of field values for the given field in the order they were returned from solr
131
+ # Returns the array of field values for the given field in the order they were returned from solr
137
132
  def facet_field_values(field)
138
133
  facet_field_values ||= {}
139
134
  facet_field_values[field.to_s] ||= begin
@@ -145,23 +140,23 @@ module DelSolr
145
140
  end
146
141
  end
147
142
 
148
- # returns a hash of value/counts for a given field (ie: {'true' => 123, 'false' => 20}
143
+ # Returns a hash of value/counts for a given field (ie: {'true' => 123, 'false' => 20}
149
144
  def facet_field_by_hash(field)
150
145
  facet_fields_by_hash(field.to_s)
151
146
  end
152
147
 
153
- # returns the count for the given field/value pair
148
+ # Returns the count for the given field/value pair
154
149
  def facet_field_count(field, value)
155
150
  facet_fields_by_hash[field.to_s][value.to_s] if facet_fields_by_hash[field.to_s]
156
151
  end
157
152
 
158
- # returns the counts for a given facet_query_name
153
+ # Returns the counts for a given facet_query_name
159
154
  def facet_query_count_by_name(facet_query_name)
160
155
  query_string = query_builder.facet_query_by_name(facet_query_name)
161
156
  facet_queries[query_string] if query_string
162
157
  end
163
158
 
164
- # returns the url send to solr
159
+ # Returns the url sent to solr
165
160
  def request_url
166
161
  query_builder.request_string
167
162
  end
@@ -2,7 +2,7 @@ module DelSolr
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 0
4
4
  MINOR = 0
5
- TINY = 1
5
+ TINY = 2
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
data/lib/delsolr.rb CHANGED
@@ -6,6 +6,7 @@
6
6
  # see README.txt
7
7
  #
8
8
 
9
+
9
10
  require 'net/http'
10
11
 
11
12
  require 'digest/md5'
@@ -14,53 +15,136 @@ require File.join(File.dirname(__FILE__), 'delsolr', 'version')
14
15
  require File.join(File.dirname(__FILE__), 'delsolr', 'response')
15
16
  require File.join(File.dirname(__FILE__), 'delsolr', 'configuration')
16
17
  require File.join(File.dirname(__FILE__), 'delsolr', 'query_builder')
18
+ require File.join(File.dirname(__FILE__), 'delsolr', 'document')
17
19
  require File.join(File.dirname(__FILE__), 'delsolr', 'extensions')
18
20
 
21
+
19
22
  module DelSolr
20
23
 
21
24
  class Client
22
25
 
23
- attr_reader :configuration, :connection, :logger
26
+ attr_reader :configuration
24
27
 
25
- # options
26
- # :server - the server you want to connect to
27
- # :port - the port you want to connect to
28
- # :cache - [optional] a cache instance (any object the supports get and set)
29
- # :shortcuts - [options] a list of values in the doc fields that generate short cuts for (defaults to [:id, :unique_id, :score]).
30
- # With the response you will then be able to do rsp.scores and have it return an array of scores.
31
- def initialize(opts = {})
32
- @configuration = DelSolr::Client::Configuration.new(opts[:server], opts[:port])
33
- @cache = opts[:cache]
34
- @shortcuts = opts[:shortcuts]
28
+ #
29
+ # [<b><tt>:server</tt></b>]
30
+ # the server you want to connect to
31
+ #
32
+ # [<b><tt>:port</tt></b>]
33
+ # the port you want to connect to
34
+ #
35
+ # [<b><tt>:cache</tt></b>]
36
+ # (optional) a cache instance (any object the supports get and set)
37
+ #
38
+ # [<b><tt>:shortcuts</tt></b>]
39
+ # (options) a list of values in the doc fields to generate short cuts for (ie: [:scores, :id], you will be able to call <tt>rsp.scores</tt> and have it return an array of scores, likewise for <tt>ids</tt>.) Defaults to [:id, :unique_id, :score]
40
+ def initialize(options = {})
41
+ @configuration = DelSolr::Client::Configuration.new(options[:server], options[:port], options[:timeout])
42
+ @cache = options[:cache]
43
+ @shortcuts = options[:shortcuts]
35
44
  end
36
45
 
37
46
  #
38
47
  #
39
- # query_name - type of query to perform (should match up w/ queries defined in solrconfig.xml)
48
+ # <tt>request_handler</tt> - type of query to perform (should match up w/ request handlers defined in solrconfig.xml)
49
+ #
50
+ #
51
+ # options
52
+ #
53
+ # [<b><tt>:query</tt></b>]
54
+ # (required) effectively the 'q' param in the solr URL. The treatment of <tt>:query</tt> depends on the type
55
+ # of request handler you are using. The supported values are Strings and Hashes. Any valid Lucene query string is acceptable
56
+ # (ie: :query => "brand:apple" and :query => "apply" are both valid). If given a Hash delsolr will build the appropriate
57
+ # query string given a hash of fieldnames => values. For instance, the following two queries are effectively the
58
+ # same. Both will end up passing "brand:apple" as the 'q' param to solr.
59
+ #
60
+ # c.query('standard', :query => {:brand => 'apple'})
61
+ # c.query('standard', :query => "brand:apple")
62
+ #
63
+ # [<b><tt>:filters</tt></b>]
64
+ # (optional)array, string, or hash of additional filters to apply. Filters end up in the 'fq' param in the solr query URL.
65
+ # The value can be a String, Array of Strings, or Hash. The following are all equivelent.
66
+ #
67
+ # c.query('standard', :query => 'abc', :filters => {:instock => true})
68
+ # c.query('standard', :query => 'abc', :filters => "instock:true")
69
+ # c.query('standard', :query => 'abc', :filters => ["instock:true"])
70
+ #
71
+ # as are the following
72
+ #
73
+ # c.query('standard', :query => 'abc', :filters => {:instock => true, :onsale => true})
74
+ # c.query('standard', :query => 'abc', :filters => ["instock:true", "onsale:true"])
75
+ #
76
+ #
77
+ # [<b><tt>:facets</tt></b>]
78
+ # (optional) array of hashes for all the facet params (ie: {:field => 'instock_b', :limit => 15, :mincount => 5})
79
+ #
80
+ # <em>Faceting by field...</em>
81
+ #
82
+ # c.query('standard', :query => 'abc', :facets => [{:field => 'brand', :limit => 15, :mincount => 5}])
83
+ #
84
+ # ...will request counts for the 'brand' field name that have a minimum of 5 documents, returning
85
+ # a max/limit of 15. The counts for this facet can be pulled from the response like so:
86
+ #
87
+ # rsp.facet_field_count('brand', 'Apple') => 17 # returns count as fixnum
88
+ #
89
+ # The list of values for this facet can be pulled from the response like so:
90
+ #
91
+ # rsp.facet_field_values('brand') => ['Apple', 'Microsoft', 'Dell'] # returns an array of strings
92
+ #
93
+ # <em>Faceting by query...</em>
94
+ #
95
+ # c.query('standard', :query => 'abc',
96
+ # :facets => [:query => {:city => 'seattle', :instock => true},
97
+ # :name => 'seattle_instock'}])
98
+ #
99
+ # ...will request counts for the number of documents where "seattle" matches on the "city" field and "instock" is set to true.
100
+ # Faceting by query requires you to assign a name to the facet so the counts can easily be fetched from the response. Solr
101
+ # resolves facet querys to count by the actual facet query string, which can be cumbersome. The delsolr response object maintains
102
+ # a mapping of query name => query string for you so your application only needs to remember the query name.
103
+ #
104
+ # The count for this facet query can be pulled like so:
105
+ #
106
+ # rsp.facet_query_count_by_name('seattle_instock').
40
107
  #
108
+ # [<b><tt>:sorts</tt></b>]
109
+ # (optional) array or string of sorts in Lucene syntax (<fieldname> <asc/desc>)
110
+ #
111
+ # c.query('standard', :query => 'abc', :sort => "product_name asc")
112
+ #
113
+ #
114
+ # [<b><tt>:limit</tt></b>]
115
+ # (optional) number to return (defaults to 10). (becomes the 'rows' param in the solr URL)
116
+ #
117
+ # c.query('standard', ;query => 'abc', :limit => 100)
41
118
  #
42
- # Possible options
43
- # :query - [required] unescape user input or solr query (can also be a hash {:field_name => value}).
44
- # :filters - [optional] array, string, or hash of additional filters to apply
45
- # :facet - [optional] array of hashes for all the facet params (ie: {:field => 'instock_b', :limit => 15, :mincount => 5})
46
- # You can also specify facets using a query (ie: {:query => 'city_idm:seattle', :name => 'seattle'} or even
47
- # {:query => {:city => 'seattle'}, :name => 'seattle'}) and then get counts for that facet by calling
48
- # rsp.facet_query_count_by_name('seattle').
49
- #
50
- # :sorts - [optional] array or string of sorts
51
- # :limit - [optional] number to return (defaults to 10)
52
- # :offset - [optional] offset (defaults to 0)
53
- # :enable_caching - [optional] switch to control whether or not to use the cache (for fetching or setting)
119
+ # [<b><tt>:offset</tt></b>]
120
+ # (optional) offset (defaults to 0, becomes the 'start' param in the solr URL)
121
+ #
122
+ # c.query('standard', :query => 'abc', :offset => 40)
123
+ #
124
+ # [<b><tt>:enable_caching</tt></b>]
125
+ # (optional) switch to control whether or not to use the cache (for fetching or setting) for the current query. Only works if a cache store was passed to the constructor.
126
+ #
127
+ # c = DelSolr::Client.new(:server => 'solr1', :port => 8983, :cache => SomeCacheStore.new)
128
+ # c.query('standard', :query => 'abc', :filters => {:instock => true}, :enable_caching => true)
129
+ # c.query('standard', :query => 'abc', :filters => {:instock => true}, :enable_caching => true) # this one should hit the cache
130
+ #
131
+ # Cache keys are created from MD5's of the solr URL that is generated.
132
+ #
133
+ # [<b><tt>:boot</tt></b>] becomes the 'bq' param which is used for query time boosting
134
+ # [<b><tt>:fields</tt></b>] becomes the 'fl' param which decides which fields to return. Defaults to 'id,unique_id,score'
135
+ #
136
+ # NOTE: Any unrecognized options will be passed along as URL params in the solr request URL. This allows you to access solr features
137
+ # which are unsupported by DelSolr.
54
138
  #
55
139
  # Returns a DelSolr::Client::Response instance
56
- def query(query_name, opts = {})
140
+ def query(request_handler, opts = {})
57
141
 
58
- raise "query_name must be supplied" if query_name.blank?
142
+ raise "request_handler must be supplied" if request_handler.blank?
59
143
 
60
144
  enable_caching = opts.delete(:enable_caching) && !@cache.nil?
61
145
  ttl = opts.delete(:ttl) || 1.hours
62
146
 
63
- query_builder = DelSolr::Client::QueryBuilder.new(query_name, opts)
147
+ query_builder = DelSolr::Client::QueryBuilder.new(request_handler, opts)
64
148
 
65
149
  # it's important that the QueryBuilder returns strings in a deterministic fashion
66
150
  # so that the cache keys will match for the same query.
@@ -74,12 +158,7 @@ module DelSolr
74
158
  end
75
159
 
76
160
  if body.blank? # cache miss (or wasn't enabled)
77
-
78
- # only bother to create the connection if we know we failed to hit the cache
79
- @connection ||= Net::HTTP.new(configuration.server, configuration.port)
80
- raise "Failed to connect to #{configuration.server}:#{configuration.port}" if @connection.nil?
81
-
82
- header, body = @connection.get(query_builder.request_string)
161
+ header, body = connection.get(query_builder.request_string)
83
162
 
84
163
  # add to the cache if caching
85
164
  if enable_caching
@@ -92,6 +171,95 @@ module DelSolr
92
171
 
93
172
  DelSolr::Client::Response.new(body, query_builder, :from_cache => from_cache, :shortcuts => @shortcuts)
94
173
  end
174
+
175
+ # Adds a document to the buffer to be posted to solr (NOTE: does not perform the actual post)
176
+ #
177
+ # [<b><tt>docs</tt></b>]
178
+ # docs must be a DelSolr::Document or array of instances. See DelSolr::Document for how to setup a document
179
+ def update(docs)
180
+ self.pending_documents.push(*Array(docs))
181
+ true
182
+ end
183
+
184
+ # Exactly like <tt>update</tt>, but performs the post immediately. Use <tt>update</tt> if you wish to batch document updates.
185
+ def update!(docs)
186
+ update(docs) && post_update!
187
+ end
188
+
189
+ # Calls <tt>update!</tt> on the docs and then <tt>commit!</tt>
190
+ def update_and_commit!(docs)
191
+ update!(docs) && commit!
192
+ end
193
+
194
+ # posts the buffer created by <tt>update</tt> to solr
195
+ def post_update!
196
+ h,b = post(prepare_update_xml())
197
+ success?(b)
198
+ end
199
+
200
+ # deletes <tt>unique_id</tt> from the index
201
+ def delete(unique_id)
202
+ h,b = post("<delete><id>#{unique_id}</id></delete>")
203
+ success?(b)
204
+ end
205
+
206
+ # not implemented
207
+ def delete_by_query(query)
208
+ raise 'not implemented yet :('
209
+ end
210
+
211
+ # commits all pending adds/deletes
212
+ def commit!
213
+ h,b = post("<commit/>")
214
+ success?(b)
215
+ end
216
+
217
+ # posts the optimize directive to solr
218
+ def optimize!
219
+ h,b = post("<optimize/>")
220
+ success?(b)
221
+ end
222
+
223
+ # accessor to the connection instance
224
+ def connection
225
+ @connection ||= begin
226
+ c = Net::HTTP.new(configuration.server, configuration.port)
227
+ c.read_timeout = configuration.timeout
228
+ raise "Failed to connect to #{configuration.server}:#{configuration.port}" if c.nil?
229
+ c
230
+ end
231
+ end
95
232
 
233
+ # clears out the connection so a new one will be created
234
+ def reset_connection!
235
+ @connection = nil
236
+ end
237
+
238
+ # returns the array of documents that are waiting to be posted to solr
239
+ def pending_documents
240
+ @pending_documents ||= []
241
+ end
242
+
243
+ private
244
+
245
+ # returns the update xml buffer
246
+ def prepare_update_xml
247
+ r = ["<add>\n"]
248
+ # copy and clear pending docs
249
+ working_docs, @pending_documents = @pending_documents, nil
250
+ working_docs.each { |doc| r << doc.xml }
251
+ r << "\n</add>\n"
252
+ r.join # not sure, but I think Array#join is faster then String#<< for large buffers
253
+ end
254
+
255
+ # helper for posting data to solr
256
+ def post(buffer)
257
+ connection.post('/solr/update', buffer, {'Content-type' => 'text/xml;charset=utf-8'})
258
+ end
259
+
260
+ def success?(response_body)
261
+ response_body == '<result status="0"></result>'
262
+ end
263
+
96
264
  end
97
265
  end
data/test/test_client.rb CHANGED
@@ -1,9 +1,119 @@
1
1
  require File.dirname(__FILE__) + '/test_helper'
2
+ require 'rubygems'
3
+ gem 'mocha', '=0.9.0'
4
+ require 'mocha'
2
5
 
3
6
  class ClientTest < Test::Unit::TestCase
4
7
 
5
8
  include Test::Unit::Assertions
6
9
 
10
+ SUCCESS = '<result status="0"></result>'
11
+ FAILURE = '<result status="1"></result>'
12
+ CONTENT_TYPE = {'Content-type' => 'text/xml;charset=utf-8'}
13
+
14
+ class TestCache
15
+ def set(k,v,t)
16
+ @cache ||= {}
17
+ @cache[k] = v
18
+ end
19
+
20
+ def get(k)
21
+ @cache ||= {}
22
+ @cache[k]
23
+ end
24
+ end
25
+
26
+ @@response_buffer = %{
27
+ {
28
+ 'responseHeader'=>{
29
+ 'status'=>0,
30
+ 'QTime'=>151,
31
+ 'params'=>{
32
+ 'wt'=>'ruby',
33
+ 'rows'=>'10',
34
+ 'explainOther'=>'',
35
+ 'start'=>'0',
36
+ 'hl.fl'=>'',
37
+ 'indent'=>'on',
38
+ 'hl'=>'on',
39
+ 'q'=>'index_type:widget',
40
+ 'fl'=>'*,score',
41
+ 'qt'=>'standard',
42
+ 'version'=>'2.2'}},
43
+ 'response'=>{'numFound'=>1522698,'start'=>0,'maxScore'=>1.5583541,'docs'=>[
44
+ {
45
+ 'index_type'=>'widget',
46
+ 'id'=>1,
47
+ 'unique_id'=>'1_widget',
48
+ 'score'=>1.5583541},
49
+ {
50
+ 'index_type'=>'widget',
51
+ 'id'=>3,
52
+ 'unique_id'=>'3_widget',
53
+ 'score'=>1.5583541},
54
+ {
55
+ 'index_type'=>'widget',
56
+ 'id'=>4,
57
+ 'unique_id'=>'4_widget',
58
+ 'score'=>1.5583541},
59
+ {
60
+ 'index_type'=>'widget',
61
+ 'id'=>5,
62
+ 'unique_id'=>'5_widget',
63
+ 'score'=>1.5583541},
64
+ {
65
+ 'index_type'=>'widget',
66
+ 'id'=>7,
67
+ 'unique_id'=>'7_widget',
68
+ 'score'=>1.5583541},
69
+ {
70
+ 'index_type'=>'widget',
71
+ 'id'=>8,
72
+ 'unique_id'=>'8_widget',
73
+ 'score'=>1.5583541},
74
+ {
75
+ 'index_type'=>'widget',
76
+ 'id'=>9,
77
+ 'unique_id'=>'9_widget',
78
+ 'score'=>1.5583541},
79
+ {
80
+ 'index_type'=>'widget',
81
+ 'id'=>10,
82
+ 'unique_id'=>'10_widget',
83
+ 'score'=>1.5583541},
84
+ {
85
+ 'index_type'=>'widget',
86
+ 'id'=>11,
87
+ 'unique_id'=>'11_widget',
88
+ 'score'=>1.5583541},
89
+ {
90
+ 'index_type'=>'widget',
91
+ 'id'=>12,
92
+ 'unique_id'=>'12_widget',
93
+ 'score'=>1.5583541}]
94
+ },
95
+ 'facet_counts'=>{
96
+ 'facet_queries'=>{
97
+ 'city_idm:19596' => 392},
98
+ 'facet_fields'=>{
99
+ 'available_b'=>[
100
+ 'false',1328],
101
+ 'onsale_b'=>[
102
+ 'false',1182,
103
+ 'true',174]}},
104
+ 'highlighting'=>{
105
+ '1_widget'=>{},
106
+ '3_widget'=>{},
107
+ '4_widget'=>{},
108
+ '5_widget'=>{},
109
+ '7_widget'=>{},
110
+ '8_widget'=>{},
111
+ '9_widget'=>{},
112
+ '10_widget'=>{},
113
+ '11_widget'=>{},
114
+ '12_widget'=>{}}}
115
+ }
116
+
7
117
  def test_create
8
118
  s = nil
9
119
  assert_nothing_raised do
@@ -12,4 +122,96 @@ class ClientTest < Test::Unit::TestCase
12
122
  assert(s)
13
123
  end
14
124
 
125
+ def test_commit_success
126
+ c = setup_client
127
+ c.connection.expects(:post).once.returns([nil,SUCCESS])
128
+ assert(c.commit!)
129
+ end
130
+
131
+ def test_commit_failure
132
+ c = setup_client
133
+ c.connection.expects(:post).once.returns([nil,FAILURE])
134
+ assert(!c.commit!)
135
+ end
136
+
137
+ def test_optimize_success
138
+ c = setup_client
139
+ c.connection.expects(:post).once.returns([nil,SUCCESS])
140
+ assert(c.optimize!)
141
+ end
142
+
143
+ def test_optimize_failure
144
+ c = setup_client
145
+ c.connection.expects(:post).once.returns([nil,FAILURE])
146
+ assert(!c.optimize!)
147
+ end
148
+
149
+ def test_update
150
+ c = setup_client
151
+
152
+ doc = DelSolr::Document.new
153
+ doc.add_field(:id, 123)
154
+ doc.add_field(:name, 'mp3 player')
155
+
156
+ expected_post_data = "<add>\n#{doc.xml}\n</add>\n"
157
+
158
+ assert(c.update(doc))
159
+ assert_equal(1, c.pending_documents.length)
160
+
161
+ c.connection.expects(:post).with('/solr/update', expected_post_data, CONTENT_TYPE).returns([nil,SUCCESS])
162
+ assert(c.post_update!)
163
+ assert_equal(0, c.pending_documents.length)
164
+ end
165
+
166
+ def test_update!
167
+ c = setup_client
168
+
169
+ doc = DelSolr::Document.new
170
+ doc.add_field(:id, 123)
171
+ doc.add_field(:name, 'mp3 player')
172
+
173
+ expected_post_data = "<add>\n#{doc.xml}\n</add>\n"
174
+
175
+ c.connection.expects(:post).with('/solr/update', expected_post_data, CONTENT_TYPE).returns([nil,SUCCESS])
176
+ assert(c.update!(doc))
177
+ assert_equal(0, c.pending_documents.length)
178
+ end
179
+
180
+ def test_query
181
+ c = setup_client
182
+
183
+ mock_query_builder = DelSolr::Client::QueryBuilder
184
+ mock_query_builder.stubs(:request_string).returns('/solr/select?some_query') # mock the query builder
185
+ DelSolr::Client::QueryBuilder.stubs(:new).returns(mock_query_builder)
186
+ c.connection.expects(:get).with(mock_query_builder.request_string).returns([nil, @@response_buffer]) # mock the connection
187
+ r = c.query('standard', :query => '123')
188
+ assert(r)
189
+ assert_equal([1,3,4,5,7,8,9,10,11,12], r.ids.sort)
190
+ assert(!r.from_cache?, 'should not be from cache')
191
+ end
192
+
193
+ def test_query_from_cache
194
+ c = setup_client(:cache => TestCache.new)
195
+
196
+ mock_query_builder = DelSolr::Client::QueryBuilder
197
+ mock_query_builder.stubs(:request_string).returns('/solr/select?some_query') # mock the query builder
198
+ DelSolr::Client::QueryBuilder.stubs(:new).returns(mock_query_builder)
199
+ c.connection.expects(:get).with(mock_query_builder.request_string).returns([nil, @@response_buffer]) # mock the connection
200
+ r = c.query('standard', :query => '123', :enable_caching => true)
201
+ assert(r)
202
+ assert_equal([1,3,4,5,7,8,9,10,11,12], r.ids.sort)
203
+ assert(!r.from_cache?, 'should not be from cache')
204
+
205
+ r = c.query('standard', :query => '123', :enable_caching => true)
206
+ assert(r)
207
+ assert_equal([1,3,4,5,7,8,9,10,11,12], r.ids.sort)
208
+ assert(r.from_cache?, 'this one should be from the cache')
209
+ end
210
+
211
+ private
212
+
213
+ def setup_client(options = {})
214
+ DelSolr::Client.new({:server => 'localhost', :port => 8983}.merge(options))
215
+ end
216
+
15
217
  end
@@ -0,0 +1,29 @@
1
+ require File.dirname(__FILE__) + '/test_helper'
2
+
3
+ class DocumentTest < Test::Unit::TestCase
4
+
5
+ include Test::Unit::Assertions
6
+
7
+ def test_create
8
+ d = DelSolr::Document.new
9
+ assert(d)
10
+
11
+ d.add_field('person_name', 'John Smith')
12
+
13
+ buf = "<doc>\n<field name=\"person_name\">John Smith</field>\n</doc>"
14
+
15
+ assert_equal(buf, d.xml)
16
+ end
17
+
18
+ def test_cdata
19
+ d = DelSolr::Document.new
20
+ assert(d)
21
+
22
+ d.add_field('person_name', 'John Smith', :cdata => true)
23
+
24
+ buf = "<doc>\n<field name=\"person_name\"><![CDATA[John Smith]]></field>\n</doc>"
25
+
26
+ assert_equal(buf, d.xml)
27
+ end
28
+
29
+ end
@@ -89,7 +89,7 @@ class QueryBuilderTest < Test::Unit::TestCase
89
89
  assert_equal(p['q'], 'index_type:books')
90
90
  end
91
91
 
92
- def test_facets
92
+ def test_facets_001
93
93
  qb = nil
94
94
  opts = {}
95
95
  opts[:query] = "games"
@@ -106,7 +106,7 @@ class QueryBuilderTest < Test::Unit::TestCase
106
106
  assert_equal(p['f.on_sale_b.facet.limit'], '1')
107
107
  end
108
108
 
109
- def test_facets
109
+ def test_facets_002
110
110
  qb = nil
111
111
  opts = {}
112
112
  opts[:query] = "games"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: delsolr
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ben VandenBos
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-09-08 00:00:00 -07:00
12
+ date: 2008-10-22 00:00:00 -07:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -45,6 +45,7 @@ files:
45
45
  - lib/delsolr/extensions.rb
46
46
  - lib/delsolr/query_builder.rb
47
47
  - lib/delsolr/response.rb
48
+ - lib/delsolr/document.rb
48
49
  - lib/delsolr/version.rb
49
50
  has_rdoc: true
50
51
  homepage: http://delsolr.rubyforge.org
@@ -84,4 +85,5 @@ test_files:
84
85
  - test/test_response.rb
85
86
  - test/test_query_builder.rb
86
87
  - test/test_helper.rb
88
+ - test/test_document.rb
87
89
  - test/test_client.rb