delsolr 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/Manifest.txt +2 -0
- data/README.txt +36 -1
- data/lib/delsolr/configuration.rb +3 -2
- data/lib/delsolr/document.rb +82 -0
- data/lib/delsolr/query_builder.rb +7 -5
- data/lib/delsolr/response.rb +28 -33
- data/lib/delsolr/version.rb +1 -1
- data/lib/delsolr.rb +201 -33
- data/test/test_client.rb +202 -0
- data/test/test_document.rb +29 -0
- data/test/test_query_builder.rb +2 -2
- metadata +4 -2
data/Manifest.txt
CHANGED
data/README.txt
CHANGED
@@ -15,6 +15,30 @@ while keeping the interface as ruby-esque as possible.
|
|
15
15
|
|
16
16
|
See http://delsolr.rubyforge.org for more info
|
17
17
|
|
18
|
+
Example:
|
19
|
+
|
20
|
+
c = DelSolr::Client.new(:server => 'solr1', :port => 8983)
|
21
|
+
rsp = c.query('dismax', :query => 'mp3 player',
|
22
|
+
:filters => {:cost => (50..100)},
|
23
|
+
:facets => [{:field => 'brand', :limit => 10},
|
24
|
+
{:query => {:onsale => true, :brand => 'Apple'},
|
25
|
+
:name => 'cheap_apple'}])
|
26
|
+
|
27
|
+
# output total matches
|
28
|
+
puts rsp.total
|
29
|
+
|
30
|
+
# output each id with score
|
31
|
+
rsp.docs.each { |doc| puts "#{doc[:id]} - #{doc[:score]}" }
|
32
|
+
|
33
|
+
# output each value for a facet
|
34
|
+
rsp.facet_field_values('brand').each do |brand|
|
35
|
+
puts "#{brand}: #{rsp.facet_field_count('brand', brand}"
|
36
|
+
end
|
37
|
+
|
38
|
+
# output a query facet
|
39
|
+
puts "Cheap Apple stuff: #{rsp.facet_query_count_by_name('cheap_apple')}"
|
40
|
+
|
41
|
+
|
18
42
|
== REQUIREMENTS:
|
19
43
|
|
20
44
|
You need Solr installed somewhere so you can query it ;)
|
@@ -23,11 +47,22 @@ You need Solr installed somewhere so you can query it ;)
|
|
23
47
|
|
24
48
|
sudo gem install delsolr
|
25
49
|
|
50
|
+
== TODO:
|
51
|
+
|
52
|
+
* finish unit tests (use mocha to stub out Net::HTTP)
|
53
|
+
* implement delete_by_query
|
54
|
+
* make thread safe
|
55
|
+
* it would be nice to be able to have things like commit/optimize be ran in threads on timers periodically
|
56
|
+
* right now a few things need to be locked
|
57
|
+
* connection
|
58
|
+
* pending_documents array
|
59
|
+
|
60
|
+
|
26
61
|
== LICENSE:
|
27
62
|
|
28
63
|
(The MIT License)
|
29
64
|
|
30
|
-
Copyright (c) 2008
|
65
|
+
Copyright (c) 2008 Avvo, INC - http://www.avvo.com
|
31
66
|
|
32
67
|
Permission is hereby granted, free of charge, to any person obtaining
|
33
68
|
a copy of this software and associated documentation files (the
|
@@ -1,11 +1,12 @@
|
|
1
1
|
module DelSolr
|
2
2
|
class Client
|
3
3
|
class Configuration
|
4
|
-
attr_accessor :server, :port
|
4
|
+
attr_accessor :server, :port, :timeout
|
5
5
|
|
6
|
-
def initialize(server, port)
|
6
|
+
def initialize(server, port, timeout = 120)
|
7
7
|
@server = server
|
8
8
|
@port = port.to_i
|
9
|
+
@timeout = timeout || 120
|
9
10
|
end
|
10
11
|
|
11
12
|
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
module DelSolr
|
2
|
+
#
|
3
|
+
# DelSolr::Client::Document
|
4
|
+
#
|
5
|
+
# Generally, indexing consists of iterating over your database, creating a huge xml buffer, and
|
6
|
+
# posting it to solr. This wraps up the xml portion and DelSolr::Client wraps up the posting/batching
|
7
|
+
# portions.
|
8
|
+
#
|
9
|
+
# This is what your indexing logic might look like if you're dealing w/ ActiveRecord objects:
|
10
|
+
#
|
11
|
+
# client = DelSolr::Client.new(:server => 'solr1', :port => 8983, :timout => 500)
|
12
|
+
# models = SomeModel.find(:all)
|
13
|
+
# models.each do |model|
|
14
|
+
# doc = DelSolr::Document.new
|
15
|
+
# doc.add_field('id', model.id)
|
16
|
+
# doc.add_field('name', model.name)
|
17
|
+
# model.tags.each do |tag| # multiple tag fields
|
18
|
+
# doc.add_field('tag', tag.name)
|
19
|
+
# end
|
20
|
+
# client.update(doc) # batch the document update
|
21
|
+
# end
|
22
|
+
# client.post_update! # send the batch to solr
|
23
|
+
# client.commit! # send the commit so solr updates the index
|
24
|
+
#
|
25
|
+
# It's generally a good idea to experiment with different batch size. 500-2000 documents per post
|
26
|
+
# is a good starting point depending on how large your documents are.
|
27
|
+
#
|
28
|
+
# You also may want to just update a signle document when it is changed. Might looks like this:
|
29
|
+
#
|
30
|
+
# def after_save
|
31
|
+
# doc = DelSolr::Document.new
|
32
|
+
# doc.add_field('id', model.id)
|
33
|
+
# doc.add_field('name', model.name)
|
34
|
+
# $client.update_and_commit!(doc) # post the document and immediately post the commit
|
35
|
+
# end
|
36
|
+
#
|
37
|
+
#
|
38
|
+
class Document
|
39
|
+
|
40
|
+
# [<b><tt>field_mame</tt></b>]
|
41
|
+
# is the name of the field in your schema.xml
|
42
|
+
# [<b><tt>value</tt></b>]
|
43
|
+
# is the value of the field you wish to be indexed
|
44
|
+
# [<b><tt>options</tt></b>]
|
45
|
+
# <b><tt>:cdata</tt></b> set to true if you want the value wrap in a CDATA tag
|
46
|
+
#
|
47
|
+
# All other options are passed directly as xml attributes (see the solr documentation on usage)
|
48
|
+
def add_field(field_name, value, options = {})
|
49
|
+
field_buffer << construct_field_tag(field_name, value, options)
|
50
|
+
end
|
51
|
+
|
52
|
+
def xml
|
53
|
+
"<doc>\n" + field_buffer + "</doc>"
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
# creates xml field for given inputs
|
59
|
+
def construct_field_tag(name, value, options={})
|
60
|
+
options[:name] = name.to_s
|
61
|
+
use_cdata = options.delete(:cdata)
|
62
|
+
opts = []
|
63
|
+
options.each do |k,v|
|
64
|
+
opts.push "#{k}=\"#{v}\""
|
65
|
+
end
|
66
|
+
opts = opts.join(" ")
|
67
|
+
opts = " " + opts if opts
|
68
|
+
|
69
|
+
return "<field#{opts}>#{use_cdata ? cdata(value) : value}</field>\n"
|
70
|
+
end
|
71
|
+
|
72
|
+
def cdata(str)
|
73
|
+
"<![CDATA[#{str}]]>"
|
74
|
+
end
|
75
|
+
|
76
|
+
def field_buffer
|
77
|
+
@buffer ||= ""
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
81
|
+
|
82
|
+
end
|
@@ -6,6 +6,8 @@ module DelSolr
|
|
6
6
|
|
7
7
|
class QueryBuilder
|
8
8
|
|
9
|
+
FL_DEFAULTS = 'id,unique_id,score' # redefine if you really want to change this.
|
10
|
+
|
9
11
|
attr_accessor :query_name, :options
|
10
12
|
|
11
13
|
# ops can basically be straight solr URL params, but it also supports some other formats
|
@@ -19,19 +21,19 @@ module DelSolr
|
|
19
21
|
def request_string
|
20
22
|
@request_string ||= build_request_string
|
21
23
|
end
|
22
|
-
|
24
|
+
|
23
25
|
# returns the query string of the facet query for the given query name (used for resolving counts for given queries)
|
24
26
|
def facet_query_by_name(query_name)
|
25
27
|
name_to_facet_query[query_name]
|
26
28
|
end
|
27
29
|
|
28
30
|
private
|
29
|
-
|
31
|
+
|
30
32
|
def build_request_string()
|
31
33
|
raise "query_name must be set" if query_name.blank?
|
32
|
-
|
34
|
+
|
33
35
|
opts = self.options.dup
|
34
|
-
|
36
|
+
|
35
37
|
# cleanup the nils
|
36
38
|
opts.delete_if {|k,v| v.nil?}
|
37
39
|
|
@@ -39,7 +41,7 @@ module DelSolr
|
|
39
41
|
opts[:q] ||= opts[:query]
|
40
42
|
opts[:rows] ||= opts[:limit] || 10
|
41
43
|
opts[:start] ||= opts[:offset] || 0
|
42
|
-
opts[:fl] ||= opts[:fields] ||
|
44
|
+
opts[:fl] ||= opts[:fields] || FL_DEFAULTS
|
43
45
|
opts[:bq] ||= opts[:boost]
|
44
46
|
opts[:suggestionCount] ||= opts[:suggestion_count]
|
45
47
|
opts[:onlyMorePopular] ||= opts[:only_more_popular]
|
data/lib/delsolr/response.rb
CHANGED
@@ -4,7 +4,7 @@ module DelSolr
|
|
4
4
|
|
5
5
|
class Response
|
6
6
|
|
7
|
-
attr_reader :
|
7
|
+
attr_reader :query_builder
|
8
8
|
|
9
9
|
def initialize(solr_response_buffer, query_builder, options = {})
|
10
10
|
@query_builder = query_builder
|
@@ -26,91 +26,86 @@ module DelSolr
|
|
26
26
|
end
|
27
27
|
end
|
28
28
|
|
29
|
-
#
|
29
|
+
# Rreturns the "raw" ruby hash that is returned by the solr ruby response writer. This is mostly for debugging purposes
|
30
|
+
def raw_response
|
31
|
+
@raw_response
|
32
|
+
end
|
33
|
+
|
34
|
+
# Returns the total number of matches
|
30
35
|
def total
|
31
36
|
@total ||= raw_response['response']['numFound']
|
32
37
|
end
|
33
38
|
|
39
|
+
# Returns true if there no results
|
34
40
|
def blank?
|
35
|
-
total
|
41
|
+
total.zero?
|
36
42
|
end
|
37
43
|
|
38
44
|
alias_method :empty?, :blank?
|
39
45
|
|
46
|
+
# Returns true if this response was pulled from the cache
|
40
47
|
def from_cache?
|
41
48
|
@from_cache
|
42
49
|
end
|
43
50
|
|
44
|
-
#
|
51
|
+
# Returns the offset that was given in the request
|
45
52
|
def offset
|
46
53
|
@offset ||= raw_response['response']['start']
|
47
54
|
end
|
48
55
|
|
49
|
-
#
|
56
|
+
# Returns the max score of the result set
|
50
57
|
def max_score
|
51
58
|
@max_score ||= raw_response['response']['maxScore'].to_f
|
52
59
|
end
|
53
60
|
|
54
|
-
#
|
55
|
-
def ids
|
56
|
-
@ids ||= docs.collect {|d| d['id']}
|
57
|
-
end
|
58
|
-
|
59
|
-
def unique_ids
|
60
|
-
@unique_ids ||= docs.collect {|d| d['unique_id']}
|
61
|
-
end
|
62
|
-
|
63
|
-
# returns an array of all the docs
|
61
|
+
# Returns an array of all the docs
|
64
62
|
def docs
|
65
63
|
@docs ||= raw_response['response']['docs']
|
66
64
|
end
|
67
65
|
|
68
|
-
#
|
66
|
+
# Helper for displaying a given field (first tries the highlight, then the stored value)
|
69
67
|
def display_for(doc, field)
|
70
68
|
highlights_for(doc['unique_id'], field) || doc[field]
|
71
69
|
end
|
72
70
|
|
73
|
-
#
|
71
|
+
# Returns the highlights for a given id for a given field
|
74
72
|
def highlights_for(unique_id, field)
|
75
73
|
raw_response['highlighting'] ||= {}
|
76
74
|
raw_response['highlighting'][unique_id] ||= {}
|
77
75
|
raw_response['highlighting'][unique_id][field]
|
78
76
|
end
|
79
77
|
|
80
|
-
|
81
|
-
@suggestions ||= raw_response['suggestions']
|
82
|
-
end
|
83
|
-
|
84
|
-
# returns the query time in ms
|
78
|
+
# Returns the query time in ms
|
85
79
|
def qtime
|
86
80
|
@qtime ||= raw_response['responseHeader']['QTime'].to_i
|
87
81
|
end
|
88
82
|
|
89
|
-
#
|
83
|
+
# Returns the status code (0 for success)
|
90
84
|
def status
|
91
85
|
@status ||= raw_response['responseHeader']['status']
|
92
86
|
end
|
93
87
|
|
94
|
-
#
|
88
|
+
# Returns the params hash
|
95
89
|
def params
|
96
90
|
@params ||= raw_response['responseHeader']['params']
|
97
91
|
end
|
98
92
|
|
99
|
-
#
|
93
|
+
# Returns the entire facet hash
|
100
94
|
def facets
|
101
95
|
@facets ||= raw_response['facet_counts'] || {}
|
102
96
|
end
|
103
97
|
|
104
|
-
#
|
98
|
+
# Returns the hash of all the facet_fields (ie: {'instock_b' => ['true', 123, 'false', 20]}
|
105
99
|
def facet_fields
|
106
100
|
@facet_fields ||= facets['facet_fields'] || {}
|
107
101
|
end
|
108
102
|
|
103
|
+
# Returns all of the facet queries
|
109
104
|
def facet_queries
|
110
105
|
@facet_queries ||= facets['facet_queries'] || {}
|
111
106
|
end
|
112
107
|
|
113
|
-
#
|
108
|
+
# Returns a hash of hashs rather than a hash of arrays (ie: {'instock_b' => {'true' => 123', 'false', => 20} })
|
114
109
|
def facet_fields_by_hash
|
115
110
|
@facet_fields_by_hash ||= begin
|
116
111
|
f = {}
|
@@ -128,12 +123,12 @@ module DelSolr
|
|
128
123
|
end
|
129
124
|
end
|
130
125
|
|
131
|
-
#
|
126
|
+
# Returns an array of value/counts for a given field (ie: ['true', 123, 'false', 20]
|
132
127
|
def facet_field(field)
|
133
128
|
facet_fields[field.to_s]
|
134
129
|
end
|
135
130
|
|
136
|
-
#
|
131
|
+
# Returns the array of field values for the given field in the order they were returned from solr
|
137
132
|
def facet_field_values(field)
|
138
133
|
facet_field_values ||= {}
|
139
134
|
facet_field_values[field.to_s] ||= begin
|
@@ -145,23 +140,23 @@ module DelSolr
|
|
145
140
|
end
|
146
141
|
end
|
147
142
|
|
148
|
-
#
|
143
|
+
# Returns a hash of value/counts for a given field (ie: {'true' => 123, 'false' => 20}
|
149
144
|
def facet_field_by_hash(field)
|
150
145
|
facet_fields_by_hash(field.to_s)
|
151
146
|
end
|
152
147
|
|
153
|
-
#
|
148
|
+
# Returns the count for the given field/value pair
|
154
149
|
def facet_field_count(field, value)
|
155
150
|
facet_fields_by_hash[field.to_s][value.to_s] if facet_fields_by_hash[field.to_s]
|
156
151
|
end
|
157
152
|
|
158
|
-
#
|
153
|
+
# Returns the counts for a given facet_query_name
|
159
154
|
def facet_query_count_by_name(facet_query_name)
|
160
155
|
query_string = query_builder.facet_query_by_name(facet_query_name)
|
161
156
|
facet_queries[query_string] if query_string
|
162
157
|
end
|
163
158
|
|
164
|
-
#
|
159
|
+
# Returns the url sent to solr
|
165
160
|
def request_url
|
166
161
|
query_builder.request_string
|
167
162
|
end
|
data/lib/delsolr/version.rb
CHANGED
data/lib/delsolr.rb
CHANGED
@@ -6,6 +6,7 @@
|
|
6
6
|
# see README.txt
|
7
7
|
#
|
8
8
|
|
9
|
+
|
9
10
|
require 'net/http'
|
10
11
|
|
11
12
|
require 'digest/md5'
|
@@ -14,53 +15,136 @@ require File.join(File.dirname(__FILE__), 'delsolr', 'version')
|
|
14
15
|
require File.join(File.dirname(__FILE__), 'delsolr', 'response')
|
15
16
|
require File.join(File.dirname(__FILE__), 'delsolr', 'configuration')
|
16
17
|
require File.join(File.dirname(__FILE__), 'delsolr', 'query_builder')
|
18
|
+
require File.join(File.dirname(__FILE__), 'delsolr', 'document')
|
17
19
|
require File.join(File.dirname(__FILE__), 'delsolr', 'extensions')
|
18
20
|
|
21
|
+
|
19
22
|
module DelSolr
|
20
23
|
|
21
24
|
class Client
|
22
25
|
|
23
|
-
attr_reader :configuration
|
26
|
+
attr_reader :configuration
|
24
27
|
|
25
|
-
#
|
26
|
-
#
|
27
|
-
#
|
28
|
-
#
|
29
|
-
#
|
30
|
-
#
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
28
|
+
#
|
29
|
+
# [<b><tt>:server</tt></b>]
|
30
|
+
# the server you want to connect to
|
31
|
+
#
|
32
|
+
# [<b><tt>:port</tt></b>]
|
33
|
+
# the port you want to connect to
|
34
|
+
#
|
35
|
+
# [<b><tt>:cache</tt></b>]
|
36
|
+
# (optional) a cache instance (any object the supports get and set)
|
37
|
+
#
|
38
|
+
# [<b><tt>:shortcuts</tt></b>]
|
39
|
+
# (options) a list of values in the doc fields to generate short cuts for (ie: [:scores, :id], you will be able to call <tt>rsp.scores</tt> and have it return an array of scores, likewise for <tt>ids</tt>.) Defaults to [:id, :unique_id, :score]
|
40
|
+
def initialize(options = {})
|
41
|
+
@configuration = DelSolr::Client::Configuration.new(options[:server], options[:port], options[:timeout])
|
42
|
+
@cache = options[:cache]
|
43
|
+
@shortcuts = options[:shortcuts]
|
35
44
|
end
|
36
45
|
|
37
46
|
#
|
38
47
|
#
|
39
|
-
#
|
48
|
+
# <tt>request_handler</tt> - type of query to perform (should match up w/ request handlers defined in solrconfig.xml)
|
49
|
+
#
|
50
|
+
#
|
51
|
+
# options
|
52
|
+
#
|
53
|
+
# [<b><tt>:query</tt></b>]
|
54
|
+
# (required) effectively the 'q' param in the solr URL. The treatment of <tt>:query</tt> depends on the type
|
55
|
+
# of request handler you are using. The supported values are Strings and Hashes. Any valid Lucene query string is acceptable
|
56
|
+
# (ie: :query => "brand:apple" and :query => "apply" are both valid). If given a Hash delsolr will build the appropriate
|
57
|
+
# query string given a hash of fieldnames => values. For instance, the following two queries are effectively the
|
58
|
+
# same. Both will end up passing "brand:apple" as the 'q' param to solr.
|
59
|
+
#
|
60
|
+
# c.query('standard', :query => {:brand => 'apple'})
|
61
|
+
# c.query('standard', :query => "brand:apple")
|
62
|
+
#
|
63
|
+
# [<b><tt>:filters</tt></b>]
|
64
|
+
# (optional)array, string, or hash of additional filters to apply. Filters end up in the 'fq' param in the solr query URL.
|
65
|
+
# The value can be a String, Array of Strings, or Hash. The following are all equivelent.
|
66
|
+
#
|
67
|
+
# c.query('standard', :query => 'abc', :filters => {:instock => true})
|
68
|
+
# c.query('standard', :query => 'abc', :filters => "instock:true")
|
69
|
+
# c.query('standard', :query => 'abc', :filters => ["instock:true"])
|
70
|
+
#
|
71
|
+
# as are the following
|
72
|
+
#
|
73
|
+
# c.query('standard', :query => 'abc', :filters => {:instock => true, :onsale => true})
|
74
|
+
# c.query('standard', :query => 'abc', :filters => ["instock:true", "onsale:true"])
|
75
|
+
#
|
76
|
+
#
|
77
|
+
# [<b><tt>:facets</tt></b>]
|
78
|
+
# (optional) array of hashes for all the facet params (ie: {:field => 'instock_b', :limit => 15, :mincount => 5})
|
79
|
+
#
|
80
|
+
# <em>Faceting by field...</em>
|
81
|
+
#
|
82
|
+
# c.query('standard', :query => 'abc', :facets => [{:field => 'brand', :limit => 15, :mincount => 5}])
|
83
|
+
#
|
84
|
+
# ...will request counts for the 'brand' field name that have a minimum of 5 documents, returning
|
85
|
+
# a max/limit of 15. The counts for this facet can be pulled from the response like so:
|
86
|
+
#
|
87
|
+
# rsp.facet_field_count('brand', 'Apple') => 17 # returns count as fixnum
|
88
|
+
#
|
89
|
+
# The list of values for this facet can be pulled from the response like so:
|
90
|
+
#
|
91
|
+
# rsp.facet_field_values('brand') => ['Apple', 'Microsoft', 'Dell'] # returns an array of strings
|
92
|
+
#
|
93
|
+
# <em>Faceting by query...</em>
|
94
|
+
#
|
95
|
+
# c.query('standard', :query => 'abc',
|
96
|
+
# :facets => [:query => {:city => 'seattle', :instock => true},
|
97
|
+
# :name => 'seattle_instock'}])
|
98
|
+
#
|
99
|
+
# ...will request counts for the number of documents where "seattle" matches on the "city" field and "instock" is set to true.
|
100
|
+
# Faceting by query requires you to assign a name to the facet so the counts can easily be fetched from the response. Solr
|
101
|
+
# resolves facet querys to count by the actual facet query string, which can be cumbersome. The delsolr response object maintains
|
102
|
+
# a mapping of query name => query string for you so your application only needs to remember the query name.
|
103
|
+
#
|
104
|
+
# The count for this facet query can be pulled like so:
|
105
|
+
#
|
106
|
+
# rsp.facet_query_count_by_name('seattle_instock').
|
40
107
|
#
|
108
|
+
# [<b><tt>:sorts</tt></b>]
|
109
|
+
# (optional) array or string of sorts in Lucene syntax (<fieldname> <asc/desc>)
|
110
|
+
#
|
111
|
+
# c.query('standard', :query => 'abc', :sort => "product_name asc")
|
112
|
+
#
|
113
|
+
#
|
114
|
+
# [<b><tt>:limit</tt></b>]
|
115
|
+
# (optional) number to return (defaults to 10). (becomes the 'rows' param in the solr URL)
|
116
|
+
#
|
117
|
+
# c.query('standard', ;query => 'abc', :limit => 100)
|
41
118
|
#
|
42
|
-
#
|
43
|
-
#
|
44
|
-
#
|
45
|
-
#
|
46
|
-
#
|
47
|
-
#
|
48
|
-
#
|
49
|
-
#
|
50
|
-
# :
|
51
|
-
# :
|
52
|
-
# :
|
53
|
-
#
|
119
|
+
# [<b><tt>:offset</tt></b>]
|
120
|
+
# (optional) offset (defaults to 0, becomes the 'start' param in the solr URL)
|
121
|
+
#
|
122
|
+
# c.query('standard', :query => 'abc', :offset => 40)
|
123
|
+
#
|
124
|
+
# [<b><tt>:enable_caching</tt></b>]
|
125
|
+
# (optional) switch to control whether or not to use the cache (for fetching or setting) for the current query. Only works if a cache store was passed to the constructor.
|
126
|
+
#
|
127
|
+
# c = DelSolr::Client.new(:server => 'solr1', :port => 8983, :cache => SomeCacheStore.new)
|
128
|
+
# c.query('standard', :query => 'abc', :filters => {:instock => true}, :enable_caching => true)
|
129
|
+
# c.query('standard', :query => 'abc', :filters => {:instock => true}, :enable_caching => true) # this one should hit the cache
|
130
|
+
#
|
131
|
+
# Cache keys are created from MD5's of the solr URL that is generated.
|
132
|
+
#
|
133
|
+
# [<b><tt>:boot</tt></b>] becomes the 'bq' param which is used for query time boosting
|
134
|
+
# [<b><tt>:fields</tt></b>] becomes the 'fl' param which decides which fields to return. Defaults to 'id,unique_id,score'
|
135
|
+
#
|
136
|
+
# NOTE: Any unrecognized options will be passed along as URL params in the solr request URL. This allows you to access solr features
|
137
|
+
# which are unsupported by DelSolr.
|
54
138
|
#
|
55
139
|
# Returns a DelSolr::Client::Response instance
|
56
|
-
def query(
|
140
|
+
def query(request_handler, opts = {})
|
57
141
|
|
58
|
-
raise "
|
142
|
+
raise "request_handler must be supplied" if request_handler.blank?
|
59
143
|
|
60
144
|
enable_caching = opts.delete(:enable_caching) && !@cache.nil?
|
61
145
|
ttl = opts.delete(:ttl) || 1.hours
|
62
146
|
|
63
|
-
query_builder = DelSolr::Client::QueryBuilder.new(
|
147
|
+
query_builder = DelSolr::Client::QueryBuilder.new(request_handler, opts)
|
64
148
|
|
65
149
|
# it's important that the QueryBuilder returns strings in a deterministic fashion
|
66
150
|
# so that the cache keys will match for the same query.
|
@@ -74,12 +158,7 @@ module DelSolr
|
|
74
158
|
end
|
75
159
|
|
76
160
|
if body.blank? # cache miss (or wasn't enabled)
|
77
|
-
|
78
|
-
# only bother to create the connection if we know we failed to hit the cache
|
79
|
-
@connection ||= Net::HTTP.new(configuration.server, configuration.port)
|
80
|
-
raise "Failed to connect to #{configuration.server}:#{configuration.port}" if @connection.nil?
|
81
|
-
|
82
|
-
header, body = @connection.get(query_builder.request_string)
|
161
|
+
header, body = connection.get(query_builder.request_string)
|
83
162
|
|
84
163
|
# add to the cache if caching
|
85
164
|
if enable_caching
|
@@ -92,6 +171,95 @@ module DelSolr
|
|
92
171
|
|
93
172
|
DelSolr::Client::Response.new(body, query_builder, :from_cache => from_cache, :shortcuts => @shortcuts)
|
94
173
|
end
|
174
|
+
|
175
|
+
# Adds a document to the buffer to be posted to solr (NOTE: does not perform the actual post)
|
176
|
+
#
|
177
|
+
# [<b><tt>docs</tt></b>]
|
178
|
+
# docs must be a DelSolr::Document or array of instances. See DelSolr::Document for how to setup a document
|
179
|
+
def update(docs)
|
180
|
+
self.pending_documents.push(*Array(docs))
|
181
|
+
true
|
182
|
+
end
|
183
|
+
|
184
|
+
# Exactly like <tt>update</tt>, but performs the post immediately. Use <tt>update</tt> if you wish to batch document updates.
|
185
|
+
def update!(docs)
|
186
|
+
update(docs) && post_update!
|
187
|
+
end
|
188
|
+
|
189
|
+
# Calls <tt>update!</tt> on the docs and then <tt>commit!</tt>
|
190
|
+
def update_and_commit!(docs)
|
191
|
+
update!(docs) && commit!
|
192
|
+
end
|
193
|
+
|
194
|
+
# posts the buffer created by <tt>update</tt> to solr
|
195
|
+
def post_update!
|
196
|
+
h,b = post(prepare_update_xml())
|
197
|
+
success?(b)
|
198
|
+
end
|
199
|
+
|
200
|
+
# deletes <tt>unique_id</tt> from the index
|
201
|
+
def delete(unique_id)
|
202
|
+
h,b = post("<delete><id>#{unique_id}</id></delete>")
|
203
|
+
success?(b)
|
204
|
+
end
|
205
|
+
|
206
|
+
# not implemented
|
207
|
+
def delete_by_query(query)
|
208
|
+
raise 'not implemented yet :('
|
209
|
+
end
|
210
|
+
|
211
|
+
# commits all pending adds/deletes
|
212
|
+
def commit!
|
213
|
+
h,b = post("<commit/>")
|
214
|
+
success?(b)
|
215
|
+
end
|
216
|
+
|
217
|
+
# posts the optimize directive to solr
|
218
|
+
def optimize!
|
219
|
+
h,b = post("<optimize/>")
|
220
|
+
success?(b)
|
221
|
+
end
|
222
|
+
|
223
|
+
# accessor to the connection instance
|
224
|
+
def connection
|
225
|
+
@connection ||= begin
|
226
|
+
c = Net::HTTP.new(configuration.server, configuration.port)
|
227
|
+
c.read_timeout = configuration.timeout
|
228
|
+
raise "Failed to connect to #{configuration.server}:#{configuration.port}" if c.nil?
|
229
|
+
c
|
230
|
+
end
|
231
|
+
end
|
95
232
|
|
233
|
+
# clears out the connection so a new one will be created
|
234
|
+
def reset_connection!
|
235
|
+
@connection = nil
|
236
|
+
end
|
237
|
+
|
238
|
+
# returns the array of documents that are waiting to be posted to solr
|
239
|
+
def pending_documents
|
240
|
+
@pending_documents ||= []
|
241
|
+
end
|
242
|
+
|
243
|
+
private
|
244
|
+
|
245
|
+
# returns the update xml buffer
|
246
|
+
def prepare_update_xml
|
247
|
+
r = ["<add>\n"]
|
248
|
+
# copy and clear pending docs
|
249
|
+
working_docs, @pending_documents = @pending_documents, nil
|
250
|
+
working_docs.each { |doc| r << doc.xml }
|
251
|
+
r << "\n</add>\n"
|
252
|
+
r.join # not sure, but I think Array#join is faster then String#<< for large buffers
|
253
|
+
end
|
254
|
+
|
255
|
+
# helper for posting data to solr
|
256
|
+
def post(buffer)
|
257
|
+
connection.post('/solr/update', buffer, {'Content-type' => 'text/xml;charset=utf-8'})
|
258
|
+
end
|
259
|
+
|
260
|
+
def success?(response_body)
|
261
|
+
response_body == '<result status="0"></result>'
|
262
|
+
end
|
263
|
+
|
96
264
|
end
|
97
265
|
end
|
data/test/test_client.rb
CHANGED
@@ -1,9 +1,119 @@
|
|
1
1
|
require File.dirname(__FILE__) + '/test_helper'
|
2
|
+
require 'rubygems'
|
3
|
+
gem 'mocha', '=0.9.0'
|
4
|
+
require 'mocha'
|
2
5
|
|
3
6
|
class ClientTest < Test::Unit::TestCase
|
4
7
|
|
5
8
|
include Test::Unit::Assertions
|
6
9
|
|
10
|
+
SUCCESS = '<result status="0"></result>'
|
11
|
+
FAILURE = '<result status="1"></result>'
|
12
|
+
CONTENT_TYPE = {'Content-type' => 'text/xml;charset=utf-8'}
|
13
|
+
|
14
|
+
class TestCache
|
15
|
+
def set(k,v,t)
|
16
|
+
@cache ||= {}
|
17
|
+
@cache[k] = v
|
18
|
+
end
|
19
|
+
|
20
|
+
def get(k)
|
21
|
+
@cache ||= {}
|
22
|
+
@cache[k]
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
@@response_buffer = %{
|
27
|
+
{
|
28
|
+
'responseHeader'=>{
|
29
|
+
'status'=>0,
|
30
|
+
'QTime'=>151,
|
31
|
+
'params'=>{
|
32
|
+
'wt'=>'ruby',
|
33
|
+
'rows'=>'10',
|
34
|
+
'explainOther'=>'',
|
35
|
+
'start'=>'0',
|
36
|
+
'hl.fl'=>'',
|
37
|
+
'indent'=>'on',
|
38
|
+
'hl'=>'on',
|
39
|
+
'q'=>'index_type:widget',
|
40
|
+
'fl'=>'*,score',
|
41
|
+
'qt'=>'standard',
|
42
|
+
'version'=>'2.2'}},
|
43
|
+
'response'=>{'numFound'=>1522698,'start'=>0,'maxScore'=>1.5583541,'docs'=>[
|
44
|
+
{
|
45
|
+
'index_type'=>'widget',
|
46
|
+
'id'=>1,
|
47
|
+
'unique_id'=>'1_widget',
|
48
|
+
'score'=>1.5583541},
|
49
|
+
{
|
50
|
+
'index_type'=>'widget',
|
51
|
+
'id'=>3,
|
52
|
+
'unique_id'=>'3_widget',
|
53
|
+
'score'=>1.5583541},
|
54
|
+
{
|
55
|
+
'index_type'=>'widget',
|
56
|
+
'id'=>4,
|
57
|
+
'unique_id'=>'4_widget',
|
58
|
+
'score'=>1.5583541},
|
59
|
+
{
|
60
|
+
'index_type'=>'widget',
|
61
|
+
'id'=>5,
|
62
|
+
'unique_id'=>'5_widget',
|
63
|
+
'score'=>1.5583541},
|
64
|
+
{
|
65
|
+
'index_type'=>'widget',
|
66
|
+
'id'=>7,
|
67
|
+
'unique_id'=>'7_widget',
|
68
|
+
'score'=>1.5583541},
|
69
|
+
{
|
70
|
+
'index_type'=>'widget',
|
71
|
+
'id'=>8,
|
72
|
+
'unique_id'=>'8_widget',
|
73
|
+
'score'=>1.5583541},
|
74
|
+
{
|
75
|
+
'index_type'=>'widget',
|
76
|
+
'id'=>9,
|
77
|
+
'unique_id'=>'9_widget',
|
78
|
+
'score'=>1.5583541},
|
79
|
+
{
|
80
|
+
'index_type'=>'widget',
|
81
|
+
'id'=>10,
|
82
|
+
'unique_id'=>'10_widget',
|
83
|
+
'score'=>1.5583541},
|
84
|
+
{
|
85
|
+
'index_type'=>'widget',
|
86
|
+
'id'=>11,
|
87
|
+
'unique_id'=>'11_widget',
|
88
|
+
'score'=>1.5583541},
|
89
|
+
{
|
90
|
+
'index_type'=>'widget',
|
91
|
+
'id'=>12,
|
92
|
+
'unique_id'=>'12_widget',
|
93
|
+
'score'=>1.5583541}]
|
94
|
+
},
|
95
|
+
'facet_counts'=>{
|
96
|
+
'facet_queries'=>{
|
97
|
+
'city_idm:19596' => 392},
|
98
|
+
'facet_fields'=>{
|
99
|
+
'available_b'=>[
|
100
|
+
'false',1328],
|
101
|
+
'onsale_b'=>[
|
102
|
+
'false',1182,
|
103
|
+
'true',174]}},
|
104
|
+
'highlighting'=>{
|
105
|
+
'1_widget'=>{},
|
106
|
+
'3_widget'=>{},
|
107
|
+
'4_widget'=>{},
|
108
|
+
'5_widget'=>{},
|
109
|
+
'7_widget'=>{},
|
110
|
+
'8_widget'=>{},
|
111
|
+
'9_widget'=>{},
|
112
|
+
'10_widget'=>{},
|
113
|
+
'11_widget'=>{},
|
114
|
+
'12_widget'=>{}}}
|
115
|
+
}
|
116
|
+
|
7
117
|
def test_create
|
8
118
|
s = nil
|
9
119
|
assert_nothing_raised do
|
@@ -12,4 +122,96 @@ class ClientTest < Test::Unit::TestCase
|
|
12
122
|
assert(s)
|
13
123
|
end
|
14
124
|
|
125
|
+
def test_commit_success
|
126
|
+
c = setup_client
|
127
|
+
c.connection.expects(:post).once.returns([nil,SUCCESS])
|
128
|
+
assert(c.commit!)
|
129
|
+
end
|
130
|
+
|
131
|
+
def test_commit_failure
|
132
|
+
c = setup_client
|
133
|
+
c.connection.expects(:post).once.returns([nil,FAILURE])
|
134
|
+
assert(!c.commit!)
|
135
|
+
end
|
136
|
+
|
137
|
+
def test_optimize_success
|
138
|
+
c = setup_client
|
139
|
+
c.connection.expects(:post).once.returns([nil,SUCCESS])
|
140
|
+
assert(c.optimize!)
|
141
|
+
end
|
142
|
+
|
143
|
+
def test_optimize_failure
|
144
|
+
c = setup_client
|
145
|
+
c.connection.expects(:post).once.returns([nil,FAILURE])
|
146
|
+
assert(!c.optimize!)
|
147
|
+
end
|
148
|
+
|
149
|
+
def test_update
|
150
|
+
c = setup_client
|
151
|
+
|
152
|
+
doc = DelSolr::Document.new
|
153
|
+
doc.add_field(:id, 123)
|
154
|
+
doc.add_field(:name, 'mp3 player')
|
155
|
+
|
156
|
+
expected_post_data = "<add>\n#{doc.xml}\n</add>\n"
|
157
|
+
|
158
|
+
assert(c.update(doc))
|
159
|
+
assert_equal(1, c.pending_documents.length)
|
160
|
+
|
161
|
+
c.connection.expects(:post).with('/solr/update', expected_post_data, CONTENT_TYPE).returns([nil,SUCCESS])
|
162
|
+
assert(c.post_update!)
|
163
|
+
assert_equal(0, c.pending_documents.length)
|
164
|
+
end
|
165
|
+
|
166
|
+
def test_update!
|
167
|
+
c = setup_client
|
168
|
+
|
169
|
+
doc = DelSolr::Document.new
|
170
|
+
doc.add_field(:id, 123)
|
171
|
+
doc.add_field(:name, 'mp3 player')
|
172
|
+
|
173
|
+
expected_post_data = "<add>\n#{doc.xml}\n</add>\n"
|
174
|
+
|
175
|
+
c.connection.expects(:post).with('/solr/update', expected_post_data, CONTENT_TYPE).returns([nil,SUCCESS])
|
176
|
+
assert(c.update!(doc))
|
177
|
+
assert_equal(0, c.pending_documents.length)
|
178
|
+
end
|
179
|
+
|
180
|
+
def test_query
|
181
|
+
c = setup_client
|
182
|
+
|
183
|
+
mock_query_builder = DelSolr::Client::QueryBuilder
|
184
|
+
mock_query_builder.stubs(:request_string).returns('/solr/select?some_query') # mock the query builder
|
185
|
+
DelSolr::Client::QueryBuilder.stubs(:new).returns(mock_query_builder)
|
186
|
+
c.connection.expects(:get).with(mock_query_builder.request_string).returns([nil, @@response_buffer]) # mock the connection
|
187
|
+
r = c.query('standard', :query => '123')
|
188
|
+
assert(r)
|
189
|
+
assert_equal([1,3,4,5,7,8,9,10,11,12], r.ids.sort)
|
190
|
+
assert(!r.from_cache?, 'should not be from cache')
|
191
|
+
end
|
192
|
+
|
193
|
+
def test_query_from_cache
|
194
|
+
c = setup_client(:cache => TestCache.new)
|
195
|
+
|
196
|
+
mock_query_builder = DelSolr::Client::QueryBuilder
|
197
|
+
mock_query_builder.stubs(:request_string).returns('/solr/select?some_query') # mock the query builder
|
198
|
+
DelSolr::Client::QueryBuilder.stubs(:new).returns(mock_query_builder)
|
199
|
+
c.connection.expects(:get).with(mock_query_builder.request_string).returns([nil, @@response_buffer]) # mock the connection
|
200
|
+
r = c.query('standard', :query => '123', :enable_caching => true)
|
201
|
+
assert(r)
|
202
|
+
assert_equal([1,3,4,5,7,8,9,10,11,12], r.ids.sort)
|
203
|
+
assert(!r.from_cache?, 'should not be from cache')
|
204
|
+
|
205
|
+
r = c.query('standard', :query => '123', :enable_caching => true)
|
206
|
+
assert(r)
|
207
|
+
assert_equal([1,3,4,5,7,8,9,10,11,12], r.ids.sort)
|
208
|
+
assert(r.from_cache?, 'this one should be from the cache')
|
209
|
+
end
|
210
|
+
|
211
|
+
private
|
212
|
+
|
213
|
+
def setup_client(options = {})
|
214
|
+
DelSolr::Client.new({:server => 'localhost', :port => 8983}.merge(options))
|
215
|
+
end
|
216
|
+
|
15
217
|
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/test_helper'
|
2
|
+
|
3
|
+
class DocumentTest < Test::Unit::TestCase
|
4
|
+
|
5
|
+
include Test::Unit::Assertions
|
6
|
+
|
7
|
+
def test_create
|
8
|
+
d = DelSolr::Document.new
|
9
|
+
assert(d)
|
10
|
+
|
11
|
+
d.add_field('person_name', 'John Smith')
|
12
|
+
|
13
|
+
buf = "<doc>\n<field name=\"person_name\">John Smith</field>\n</doc>"
|
14
|
+
|
15
|
+
assert_equal(buf, d.xml)
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_cdata
|
19
|
+
d = DelSolr::Document.new
|
20
|
+
assert(d)
|
21
|
+
|
22
|
+
d.add_field('person_name', 'John Smith', :cdata => true)
|
23
|
+
|
24
|
+
buf = "<doc>\n<field name=\"person_name\"><![CDATA[John Smith]]></field>\n</doc>"
|
25
|
+
|
26
|
+
assert_equal(buf, d.xml)
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
data/test/test_query_builder.rb
CHANGED
@@ -89,7 +89,7 @@ class QueryBuilderTest < Test::Unit::TestCase
|
|
89
89
|
assert_equal(p['q'], 'index_type:books')
|
90
90
|
end
|
91
91
|
|
92
|
-
def
|
92
|
+
def test_facets_001
|
93
93
|
qb = nil
|
94
94
|
opts = {}
|
95
95
|
opts[:query] = "games"
|
@@ -106,7 +106,7 @@ class QueryBuilderTest < Test::Unit::TestCase
|
|
106
106
|
assert_equal(p['f.on_sale_b.facet.limit'], '1')
|
107
107
|
end
|
108
108
|
|
109
|
-
def
|
109
|
+
def test_facets_002
|
110
110
|
qb = nil
|
111
111
|
opts = {}
|
112
112
|
opts[:query] = "games"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: delsolr
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ben VandenBos
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2008-
|
12
|
+
date: 2008-10-22 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -45,6 +45,7 @@ files:
|
|
45
45
|
- lib/delsolr/extensions.rb
|
46
46
|
- lib/delsolr/query_builder.rb
|
47
47
|
- lib/delsolr/response.rb
|
48
|
+
- lib/delsolr/document.rb
|
48
49
|
- lib/delsolr/version.rb
|
49
50
|
has_rdoc: true
|
50
51
|
homepage: http://delsolr.rubyforge.org
|
@@ -84,4 +85,5 @@ test_files:
|
|
84
85
|
- test/test_response.rb
|
85
86
|
- test/test_query_builder.rb
|
86
87
|
- test/test_helper.rb
|
88
|
+
- test/test_document.rb
|
87
89
|
- test/test_client.rb
|