delsolr 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Manifest.txt +2 -0
- data/README.txt +36 -1
- data/lib/delsolr/configuration.rb +3 -2
- data/lib/delsolr/document.rb +82 -0
- data/lib/delsolr/query_builder.rb +7 -5
- data/lib/delsolr/response.rb +28 -33
- data/lib/delsolr/version.rb +1 -1
- data/lib/delsolr.rb +201 -33
- data/test/test_client.rb +202 -0
- data/test/test_document.rb +29 -0
- data/test/test_query_builder.rb +2 -2
- metadata +4 -2
data/Manifest.txt
CHANGED
data/README.txt
CHANGED
@@ -15,6 +15,30 @@ while keeping the interface as ruby-esque as possible.
|
|
15
15
|
|
16
16
|
See http://delsolr.rubyforge.org for more info
|
17
17
|
|
18
|
+
Example:
|
19
|
+
|
20
|
+
c = DelSolr::Client.new(:server => 'solr1', :port => 8983)
|
21
|
+
rsp = c.query('dismax', :query => 'mp3 player',
|
22
|
+
:filters => {:cost => (50..100)},
|
23
|
+
:facets => [{:field => 'brand', :limit => 10},
|
24
|
+
{:query => {:onsale => true, :brand => 'Apple'},
|
25
|
+
:name => 'cheap_apple'}])
|
26
|
+
|
27
|
+
# output total matches
|
28
|
+
puts rsp.total
|
29
|
+
|
30
|
+
# output each id with score
|
31
|
+
rsp.docs.each { |doc| puts "#{doc[:id]} - #{doc[:score]}" }
|
32
|
+
|
33
|
+
# output each value for a facet
|
34
|
+
rsp.facet_field_values('brand').each do |brand|
|
35
|
+
puts "#{brand}: #{rsp.facet_field_count('brand', brand}"
|
36
|
+
end
|
37
|
+
|
38
|
+
# output a query facet
|
39
|
+
puts "Cheap Apple stuff: #{rsp.facet_query_count_by_name('cheap_apple')}"
|
40
|
+
|
41
|
+
|
18
42
|
== REQUIREMENTS:
|
19
43
|
|
20
44
|
You need Solr installed somewhere so you can query it ;)
|
@@ -23,11 +47,22 @@ You need Solr installed somewhere so you can query it ;)
|
|
23
47
|
|
24
48
|
sudo gem install delsolr
|
25
49
|
|
50
|
+
== TODO:
|
51
|
+
|
52
|
+
* finish unit tests (use mocha to stub out Net::HTTP)
|
53
|
+
* implement delete_by_query
|
54
|
+
* make thread safe
|
55
|
+
* it would be nice to be able to have things like commit/optimize be ran in threads on timers periodically
|
56
|
+
* right now a few things need to be locked
|
57
|
+
* connection
|
58
|
+
* pending_documents array
|
59
|
+
|
60
|
+
|
26
61
|
== LICENSE:
|
27
62
|
|
28
63
|
(The MIT License)
|
29
64
|
|
30
|
-
Copyright (c) 2008
|
65
|
+
Copyright (c) 2008 Avvo, INC - http://www.avvo.com
|
31
66
|
|
32
67
|
Permission is hereby granted, free of charge, to any person obtaining
|
33
68
|
a copy of this software and associated documentation files (the
|
@@ -1,11 +1,12 @@
|
|
1
1
|
module DelSolr
|
2
2
|
class Client
|
3
3
|
class Configuration
|
4
|
-
attr_accessor :server, :port
|
4
|
+
attr_accessor :server, :port, :timeout
|
5
5
|
|
6
|
-
def initialize(server, port)
|
6
|
+
def initialize(server, port, timeout = 120)
|
7
7
|
@server = server
|
8
8
|
@port = port.to_i
|
9
|
+
@timeout = timeout || 120
|
9
10
|
end
|
10
11
|
|
11
12
|
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
module DelSolr
|
2
|
+
#
|
3
|
+
# DelSolr::Client::Document
|
4
|
+
#
|
5
|
+
# Generally, indexing consists of iterating over your database, creating a huge xml buffer, and
|
6
|
+
# posting it to solr. This wraps up the xml portion and DelSolr::Client wraps up the posting/batching
|
7
|
+
# portions.
|
8
|
+
#
|
9
|
+
# This is what your indexing logic might look like if you're dealing w/ ActiveRecord objects:
|
10
|
+
#
|
11
|
+
# client = DelSolr::Client.new(:server => 'solr1', :port => 8983, :timout => 500)
|
12
|
+
# models = SomeModel.find(:all)
|
13
|
+
# models.each do |model|
|
14
|
+
# doc = DelSolr::Document.new
|
15
|
+
# doc.add_field('id', model.id)
|
16
|
+
# doc.add_field('name', model.name)
|
17
|
+
# model.tags.each do |tag| # multiple tag fields
|
18
|
+
# doc.add_field('tag', tag.name)
|
19
|
+
# end
|
20
|
+
# client.update(doc) # batch the document update
|
21
|
+
# end
|
22
|
+
# client.post_update! # send the batch to solr
|
23
|
+
# client.commit! # send the commit so solr updates the index
|
24
|
+
#
|
25
|
+
# It's generally a good idea to experiment with different batch size. 500-2000 documents per post
|
26
|
+
# is a good starting point depending on how large your documents are.
|
27
|
+
#
|
28
|
+
# You also may want to just update a signle document when it is changed. Might looks like this:
|
29
|
+
#
|
30
|
+
# def after_save
|
31
|
+
# doc = DelSolr::Document.new
|
32
|
+
# doc.add_field('id', model.id)
|
33
|
+
# doc.add_field('name', model.name)
|
34
|
+
# $client.update_and_commit!(doc) # post the document and immediately post the commit
|
35
|
+
# end
|
36
|
+
#
|
37
|
+
#
|
38
|
+
class Document
|
39
|
+
|
40
|
+
# [<b><tt>field_mame</tt></b>]
|
41
|
+
# is the name of the field in your schema.xml
|
42
|
+
# [<b><tt>value</tt></b>]
|
43
|
+
# is the value of the field you wish to be indexed
|
44
|
+
# [<b><tt>options</tt></b>]
|
45
|
+
# <b><tt>:cdata</tt></b> set to true if you want the value wrap in a CDATA tag
|
46
|
+
#
|
47
|
+
# All other options are passed directly as xml attributes (see the solr documentation on usage)
|
48
|
+
def add_field(field_name, value, options = {})
|
49
|
+
field_buffer << construct_field_tag(field_name, value, options)
|
50
|
+
end
|
51
|
+
|
52
|
+
def xml
|
53
|
+
"<doc>\n" + field_buffer + "</doc>"
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
# creates xml field for given inputs
|
59
|
+
def construct_field_tag(name, value, options={})
|
60
|
+
options[:name] = name.to_s
|
61
|
+
use_cdata = options.delete(:cdata)
|
62
|
+
opts = []
|
63
|
+
options.each do |k,v|
|
64
|
+
opts.push "#{k}=\"#{v}\""
|
65
|
+
end
|
66
|
+
opts = opts.join(" ")
|
67
|
+
opts = " " + opts if opts
|
68
|
+
|
69
|
+
return "<field#{opts}>#{use_cdata ? cdata(value) : value}</field>\n"
|
70
|
+
end
|
71
|
+
|
72
|
+
def cdata(str)
|
73
|
+
"<![CDATA[#{str}]]>"
|
74
|
+
end
|
75
|
+
|
76
|
+
def field_buffer
|
77
|
+
@buffer ||= ""
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
81
|
+
|
82
|
+
end
|
@@ -6,6 +6,8 @@ module DelSolr
|
|
6
6
|
|
7
7
|
class QueryBuilder
|
8
8
|
|
9
|
+
FL_DEFAULTS = 'id,unique_id,score' # redefine if you really want to change this.
|
10
|
+
|
9
11
|
attr_accessor :query_name, :options
|
10
12
|
|
11
13
|
# ops can basically be straight solr URL params, but it also supports some other formats
|
@@ -19,19 +21,19 @@ module DelSolr
|
|
19
21
|
def request_string
|
20
22
|
@request_string ||= build_request_string
|
21
23
|
end
|
22
|
-
|
24
|
+
|
23
25
|
# returns the query string of the facet query for the given query name (used for resolving counts for given queries)
|
24
26
|
def facet_query_by_name(query_name)
|
25
27
|
name_to_facet_query[query_name]
|
26
28
|
end
|
27
29
|
|
28
30
|
private
|
29
|
-
|
31
|
+
|
30
32
|
def build_request_string()
|
31
33
|
raise "query_name must be set" if query_name.blank?
|
32
|
-
|
34
|
+
|
33
35
|
opts = self.options.dup
|
34
|
-
|
36
|
+
|
35
37
|
# cleanup the nils
|
36
38
|
opts.delete_if {|k,v| v.nil?}
|
37
39
|
|
@@ -39,7 +41,7 @@ module DelSolr
|
|
39
41
|
opts[:q] ||= opts[:query]
|
40
42
|
opts[:rows] ||= opts[:limit] || 10
|
41
43
|
opts[:start] ||= opts[:offset] || 0
|
42
|
-
opts[:fl] ||= opts[:fields] ||
|
44
|
+
opts[:fl] ||= opts[:fields] || FL_DEFAULTS
|
43
45
|
opts[:bq] ||= opts[:boost]
|
44
46
|
opts[:suggestionCount] ||= opts[:suggestion_count]
|
45
47
|
opts[:onlyMorePopular] ||= opts[:only_more_popular]
|
data/lib/delsolr/response.rb
CHANGED
@@ -4,7 +4,7 @@ module DelSolr
|
|
4
4
|
|
5
5
|
class Response
|
6
6
|
|
7
|
-
attr_reader :
|
7
|
+
attr_reader :query_builder
|
8
8
|
|
9
9
|
def initialize(solr_response_buffer, query_builder, options = {})
|
10
10
|
@query_builder = query_builder
|
@@ -26,91 +26,86 @@ module DelSolr
|
|
26
26
|
end
|
27
27
|
end
|
28
28
|
|
29
|
-
#
|
29
|
+
# Rreturns the "raw" ruby hash that is returned by the solr ruby response writer. This is mostly for debugging purposes
|
30
|
+
def raw_response
|
31
|
+
@raw_response
|
32
|
+
end
|
33
|
+
|
34
|
+
# Returns the total number of matches
|
30
35
|
def total
|
31
36
|
@total ||= raw_response['response']['numFound']
|
32
37
|
end
|
33
38
|
|
39
|
+
# Returns true if there no results
|
34
40
|
def blank?
|
35
|
-
total
|
41
|
+
total.zero?
|
36
42
|
end
|
37
43
|
|
38
44
|
alias_method :empty?, :blank?
|
39
45
|
|
46
|
+
# Returns true if this response was pulled from the cache
|
40
47
|
def from_cache?
|
41
48
|
@from_cache
|
42
49
|
end
|
43
50
|
|
44
|
-
#
|
51
|
+
# Returns the offset that was given in the request
|
45
52
|
def offset
|
46
53
|
@offset ||= raw_response['response']['start']
|
47
54
|
end
|
48
55
|
|
49
|
-
#
|
56
|
+
# Returns the max score of the result set
|
50
57
|
def max_score
|
51
58
|
@max_score ||= raw_response['response']['maxScore'].to_f
|
52
59
|
end
|
53
60
|
|
54
|
-
#
|
55
|
-
def ids
|
56
|
-
@ids ||= docs.collect {|d| d['id']}
|
57
|
-
end
|
58
|
-
|
59
|
-
def unique_ids
|
60
|
-
@unique_ids ||= docs.collect {|d| d['unique_id']}
|
61
|
-
end
|
62
|
-
|
63
|
-
# returns an array of all the docs
|
61
|
+
# Returns an array of all the docs
|
64
62
|
def docs
|
65
63
|
@docs ||= raw_response['response']['docs']
|
66
64
|
end
|
67
65
|
|
68
|
-
#
|
66
|
+
# Helper for displaying a given field (first tries the highlight, then the stored value)
|
69
67
|
def display_for(doc, field)
|
70
68
|
highlights_for(doc['unique_id'], field) || doc[field]
|
71
69
|
end
|
72
70
|
|
73
|
-
#
|
71
|
+
# Returns the highlights for a given id for a given field
|
74
72
|
def highlights_for(unique_id, field)
|
75
73
|
raw_response['highlighting'] ||= {}
|
76
74
|
raw_response['highlighting'][unique_id] ||= {}
|
77
75
|
raw_response['highlighting'][unique_id][field]
|
78
76
|
end
|
79
77
|
|
80
|
-
|
81
|
-
@suggestions ||= raw_response['suggestions']
|
82
|
-
end
|
83
|
-
|
84
|
-
# returns the query time in ms
|
78
|
+
# Returns the query time in ms
|
85
79
|
def qtime
|
86
80
|
@qtime ||= raw_response['responseHeader']['QTime'].to_i
|
87
81
|
end
|
88
82
|
|
89
|
-
#
|
83
|
+
# Returns the status code (0 for success)
|
90
84
|
def status
|
91
85
|
@status ||= raw_response['responseHeader']['status']
|
92
86
|
end
|
93
87
|
|
94
|
-
#
|
88
|
+
# Returns the params hash
|
95
89
|
def params
|
96
90
|
@params ||= raw_response['responseHeader']['params']
|
97
91
|
end
|
98
92
|
|
99
|
-
#
|
93
|
+
# Returns the entire facet hash
|
100
94
|
def facets
|
101
95
|
@facets ||= raw_response['facet_counts'] || {}
|
102
96
|
end
|
103
97
|
|
104
|
-
#
|
98
|
+
# Returns the hash of all the facet_fields (ie: {'instock_b' => ['true', 123, 'false', 20]}
|
105
99
|
def facet_fields
|
106
100
|
@facet_fields ||= facets['facet_fields'] || {}
|
107
101
|
end
|
108
102
|
|
103
|
+
# Returns all of the facet queries
|
109
104
|
def facet_queries
|
110
105
|
@facet_queries ||= facets['facet_queries'] || {}
|
111
106
|
end
|
112
107
|
|
113
|
-
#
|
108
|
+
# Returns a hash of hashs rather than a hash of arrays (ie: {'instock_b' => {'true' => 123', 'false', => 20} })
|
114
109
|
def facet_fields_by_hash
|
115
110
|
@facet_fields_by_hash ||= begin
|
116
111
|
f = {}
|
@@ -128,12 +123,12 @@ module DelSolr
|
|
128
123
|
end
|
129
124
|
end
|
130
125
|
|
131
|
-
#
|
126
|
+
# Returns an array of value/counts for a given field (ie: ['true', 123, 'false', 20]
|
132
127
|
def facet_field(field)
|
133
128
|
facet_fields[field.to_s]
|
134
129
|
end
|
135
130
|
|
136
|
-
#
|
131
|
+
# Returns the array of field values for the given field in the order they were returned from solr
|
137
132
|
def facet_field_values(field)
|
138
133
|
facet_field_values ||= {}
|
139
134
|
facet_field_values[field.to_s] ||= begin
|
@@ -145,23 +140,23 @@ module DelSolr
|
|
145
140
|
end
|
146
141
|
end
|
147
142
|
|
148
|
-
#
|
143
|
+
# Returns a hash of value/counts for a given field (ie: {'true' => 123, 'false' => 20}
|
149
144
|
def facet_field_by_hash(field)
|
150
145
|
facet_fields_by_hash(field.to_s)
|
151
146
|
end
|
152
147
|
|
153
|
-
#
|
148
|
+
# Returns the count for the given field/value pair
|
154
149
|
def facet_field_count(field, value)
|
155
150
|
facet_fields_by_hash[field.to_s][value.to_s] if facet_fields_by_hash[field.to_s]
|
156
151
|
end
|
157
152
|
|
158
|
-
#
|
153
|
+
# Returns the counts for a given facet_query_name
|
159
154
|
def facet_query_count_by_name(facet_query_name)
|
160
155
|
query_string = query_builder.facet_query_by_name(facet_query_name)
|
161
156
|
facet_queries[query_string] if query_string
|
162
157
|
end
|
163
158
|
|
164
|
-
#
|
159
|
+
# Returns the url sent to solr
|
165
160
|
def request_url
|
166
161
|
query_builder.request_string
|
167
162
|
end
|
data/lib/delsolr/version.rb
CHANGED
data/lib/delsolr.rb
CHANGED
@@ -6,6 +6,7 @@
|
|
6
6
|
# see README.txt
|
7
7
|
#
|
8
8
|
|
9
|
+
|
9
10
|
require 'net/http'
|
10
11
|
|
11
12
|
require 'digest/md5'
|
@@ -14,53 +15,136 @@ require File.join(File.dirname(__FILE__), 'delsolr', 'version')
|
|
14
15
|
require File.join(File.dirname(__FILE__), 'delsolr', 'response')
|
15
16
|
require File.join(File.dirname(__FILE__), 'delsolr', 'configuration')
|
16
17
|
require File.join(File.dirname(__FILE__), 'delsolr', 'query_builder')
|
18
|
+
require File.join(File.dirname(__FILE__), 'delsolr', 'document')
|
17
19
|
require File.join(File.dirname(__FILE__), 'delsolr', 'extensions')
|
18
20
|
|
21
|
+
|
19
22
|
module DelSolr
|
20
23
|
|
21
24
|
class Client
|
22
25
|
|
23
|
-
attr_reader :configuration
|
26
|
+
attr_reader :configuration
|
24
27
|
|
25
|
-
#
|
26
|
-
#
|
27
|
-
#
|
28
|
-
#
|
29
|
-
#
|
30
|
-
#
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
28
|
+
#
|
29
|
+
# [<b><tt>:server</tt></b>]
|
30
|
+
# the server you want to connect to
|
31
|
+
#
|
32
|
+
# [<b><tt>:port</tt></b>]
|
33
|
+
# the port you want to connect to
|
34
|
+
#
|
35
|
+
# [<b><tt>:cache</tt></b>]
|
36
|
+
# (optional) a cache instance (any object the supports get and set)
|
37
|
+
#
|
38
|
+
# [<b><tt>:shortcuts</tt></b>]
|
39
|
+
# (options) a list of values in the doc fields to generate short cuts for (ie: [:scores, :id], you will be able to call <tt>rsp.scores</tt> and have it return an array of scores, likewise for <tt>ids</tt>.) Defaults to [:id, :unique_id, :score]
|
40
|
+
def initialize(options = {})
|
41
|
+
@configuration = DelSolr::Client::Configuration.new(options[:server], options[:port], options[:timeout])
|
42
|
+
@cache = options[:cache]
|
43
|
+
@shortcuts = options[:shortcuts]
|
35
44
|
end
|
36
45
|
|
37
46
|
#
|
38
47
|
#
|
39
|
-
#
|
48
|
+
# <tt>request_handler</tt> - type of query to perform (should match up w/ request handlers defined in solrconfig.xml)
|
49
|
+
#
|
50
|
+
#
|
51
|
+
# options
|
52
|
+
#
|
53
|
+
# [<b><tt>:query</tt></b>]
|
54
|
+
# (required) effectively the 'q' param in the solr URL. The treatment of <tt>:query</tt> depends on the type
|
55
|
+
# of request handler you are using. The supported values are Strings and Hashes. Any valid Lucene query string is acceptable
|
56
|
+
# (ie: :query => "brand:apple" and :query => "apply" are both valid). If given a Hash delsolr will build the appropriate
|
57
|
+
# query string given a hash of fieldnames => values. For instance, the following two queries are effectively the
|
58
|
+
# same. Both will end up passing "brand:apple" as the 'q' param to solr.
|
59
|
+
#
|
60
|
+
# c.query('standard', :query => {:brand => 'apple'})
|
61
|
+
# c.query('standard', :query => "brand:apple")
|
62
|
+
#
|
63
|
+
# [<b><tt>:filters</tt></b>]
|
64
|
+
# (optional)array, string, or hash of additional filters to apply. Filters end up in the 'fq' param in the solr query URL.
|
65
|
+
# The value can be a String, Array of Strings, or Hash. The following are all equivelent.
|
66
|
+
#
|
67
|
+
# c.query('standard', :query => 'abc', :filters => {:instock => true})
|
68
|
+
# c.query('standard', :query => 'abc', :filters => "instock:true")
|
69
|
+
# c.query('standard', :query => 'abc', :filters => ["instock:true"])
|
70
|
+
#
|
71
|
+
# as are the following
|
72
|
+
#
|
73
|
+
# c.query('standard', :query => 'abc', :filters => {:instock => true, :onsale => true})
|
74
|
+
# c.query('standard', :query => 'abc', :filters => ["instock:true", "onsale:true"])
|
75
|
+
#
|
76
|
+
#
|
77
|
+
# [<b><tt>:facets</tt></b>]
|
78
|
+
# (optional) array of hashes for all the facet params (ie: {:field => 'instock_b', :limit => 15, :mincount => 5})
|
79
|
+
#
|
80
|
+
# <em>Faceting by field...</em>
|
81
|
+
#
|
82
|
+
# c.query('standard', :query => 'abc', :facets => [{:field => 'brand', :limit => 15, :mincount => 5}])
|
83
|
+
#
|
84
|
+
# ...will request counts for the 'brand' field name that have a minimum of 5 documents, returning
|
85
|
+
# a max/limit of 15. The counts for this facet can be pulled from the response like so:
|
86
|
+
#
|
87
|
+
# rsp.facet_field_count('brand', 'Apple') => 17 # returns count as fixnum
|
88
|
+
#
|
89
|
+
# The list of values for this facet can be pulled from the response like so:
|
90
|
+
#
|
91
|
+
# rsp.facet_field_values('brand') => ['Apple', 'Microsoft', 'Dell'] # returns an array of strings
|
92
|
+
#
|
93
|
+
# <em>Faceting by query...</em>
|
94
|
+
#
|
95
|
+
# c.query('standard', :query => 'abc',
|
96
|
+
# :facets => [:query => {:city => 'seattle', :instock => true},
|
97
|
+
# :name => 'seattle_instock'}])
|
98
|
+
#
|
99
|
+
# ...will request counts for the number of documents where "seattle" matches on the "city" field and "instock" is set to true.
|
100
|
+
# Faceting by query requires you to assign a name to the facet so the counts can easily be fetched from the response. Solr
|
101
|
+
# resolves facet querys to count by the actual facet query string, which can be cumbersome. The delsolr response object maintains
|
102
|
+
# a mapping of query name => query string for you so your application only needs to remember the query name.
|
103
|
+
#
|
104
|
+
# The count for this facet query can be pulled like so:
|
105
|
+
#
|
106
|
+
# rsp.facet_query_count_by_name('seattle_instock').
|
40
107
|
#
|
108
|
+
# [<b><tt>:sorts</tt></b>]
|
109
|
+
# (optional) array or string of sorts in Lucene syntax (<fieldname> <asc/desc>)
|
110
|
+
#
|
111
|
+
# c.query('standard', :query => 'abc', :sort => "product_name asc")
|
112
|
+
#
|
113
|
+
#
|
114
|
+
# [<b><tt>:limit</tt></b>]
|
115
|
+
# (optional) number to return (defaults to 10). (becomes the 'rows' param in the solr URL)
|
116
|
+
#
|
117
|
+
# c.query('standard', ;query => 'abc', :limit => 100)
|
41
118
|
#
|
42
|
-
#
|
43
|
-
#
|
44
|
-
#
|
45
|
-
#
|
46
|
-
#
|
47
|
-
#
|
48
|
-
#
|
49
|
-
#
|
50
|
-
# :
|
51
|
-
# :
|
52
|
-
# :
|
53
|
-
#
|
119
|
+
# [<b><tt>:offset</tt></b>]
|
120
|
+
# (optional) offset (defaults to 0, becomes the 'start' param in the solr URL)
|
121
|
+
#
|
122
|
+
# c.query('standard', :query => 'abc', :offset => 40)
|
123
|
+
#
|
124
|
+
# [<b><tt>:enable_caching</tt></b>]
|
125
|
+
# (optional) switch to control whether or not to use the cache (for fetching or setting) for the current query. Only works if a cache store was passed to the constructor.
|
126
|
+
#
|
127
|
+
# c = DelSolr::Client.new(:server => 'solr1', :port => 8983, :cache => SomeCacheStore.new)
|
128
|
+
# c.query('standard', :query => 'abc', :filters => {:instock => true}, :enable_caching => true)
|
129
|
+
# c.query('standard', :query => 'abc', :filters => {:instock => true}, :enable_caching => true) # this one should hit the cache
|
130
|
+
#
|
131
|
+
# Cache keys are created from MD5's of the solr URL that is generated.
|
132
|
+
#
|
133
|
+
# [<b><tt>:boot</tt></b>] becomes the 'bq' param which is used for query time boosting
|
134
|
+
# [<b><tt>:fields</tt></b>] becomes the 'fl' param which decides which fields to return. Defaults to 'id,unique_id,score'
|
135
|
+
#
|
136
|
+
# NOTE: Any unrecognized options will be passed along as URL params in the solr request URL. This allows you to access solr features
|
137
|
+
# which are unsupported by DelSolr.
|
54
138
|
#
|
55
139
|
# Returns a DelSolr::Client::Response instance
|
56
|
-
def query(
|
140
|
+
def query(request_handler, opts = {})
|
57
141
|
|
58
|
-
raise "
|
142
|
+
raise "request_handler must be supplied" if request_handler.blank?
|
59
143
|
|
60
144
|
enable_caching = opts.delete(:enable_caching) && !@cache.nil?
|
61
145
|
ttl = opts.delete(:ttl) || 1.hours
|
62
146
|
|
63
|
-
query_builder = DelSolr::Client::QueryBuilder.new(
|
147
|
+
query_builder = DelSolr::Client::QueryBuilder.new(request_handler, opts)
|
64
148
|
|
65
149
|
# it's important that the QueryBuilder returns strings in a deterministic fashion
|
66
150
|
# so that the cache keys will match for the same query.
|
@@ -74,12 +158,7 @@ module DelSolr
|
|
74
158
|
end
|
75
159
|
|
76
160
|
if body.blank? # cache miss (or wasn't enabled)
|
77
|
-
|
78
|
-
# only bother to create the connection if we know we failed to hit the cache
|
79
|
-
@connection ||= Net::HTTP.new(configuration.server, configuration.port)
|
80
|
-
raise "Failed to connect to #{configuration.server}:#{configuration.port}" if @connection.nil?
|
81
|
-
|
82
|
-
header, body = @connection.get(query_builder.request_string)
|
161
|
+
header, body = connection.get(query_builder.request_string)
|
83
162
|
|
84
163
|
# add to the cache if caching
|
85
164
|
if enable_caching
|
@@ -92,6 +171,95 @@ module DelSolr
|
|
92
171
|
|
93
172
|
DelSolr::Client::Response.new(body, query_builder, :from_cache => from_cache, :shortcuts => @shortcuts)
|
94
173
|
end
|
174
|
+
|
175
|
+
# Adds a document to the buffer to be posted to solr (NOTE: does not perform the actual post)
|
176
|
+
#
|
177
|
+
# [<b><tt>docs</tt></b>]
|
178
|
+
# docs must be a DelSolr::Document or array of instances. See DelSolr::Document for how to setup a document
|
179
|
+
def update(docs)
|
180
|
+
self.pending_documents.push(*Array(docs))
|
181
|
+
true
|
182
|
+
end
|
183
|
+
|
184
|
+
# Exactly like <tt>update</tt>, but performs the post immediately. Use <tt>update</tt> if you wish to batch document updates.
|
185
|
+
def update!(docs)
|
186
|
+
update(docs) && post_update!
|
187
|
+
end
|
188
|
+
|
189
|
+
# Calls <tt>update!</tt> on the docs and then <tt>commit!</tt>
|
190
|
+
def update_and_commit!(docs)
|
191
|
+
update!(docs) && commit!
|
192
|
+
end
|
193
|
+
|
194
|
+
# posts the buffer created by <tt>update</tt> to solr
|
195
|
+
def post_update!
|
196
|
+
h,b = post(prepare_update_xml())
|
197
|
+
success?(b)
|
198
|
+
end
|
199
|
+
|
200
|
+
# deletes <tt>unique_id</tt> from the index
|
201
|
+
def delete(unique_id)
|
202
|
+
h,b = post("<delete><id>#{unique_id}</id></delete>")
|
203
|
+
success?(b)
|
204
|
+
end
|
205
|
+
|
206
|
+
# not implemented
|
207
|
+
def delete_by_query(query)
|
208
|
+
raise 'not implemented yet :('
|
209
|
+
end
|
210
|
+
|
211
|
+
# commits all pending adds/deletes
|
212
|
+
def commit!
|
213
|
+
h,b = post("<commit/>")
|
214
|
+
success?(b)
|
215
|
+
end
|
216
|
+
|
217
|
+
# posts the optimize directive to solr
|
218
|
+
def optimize!
|
219
|
+
h,b = post("<optimize/>")
|
220
|
+
success?(b)
|
221
|
+
end
|
222
|
+
|
223
|
+
# accessor to the connection instance
|
224
|
+
def connection
|
225
|
+
@connection ||= begin
|
226
|
+
c = Net::HTTP.new(configuration.server, configuration.port)
|
227
|
+
c.read_timeout = configuration.timeout
|
228
|
+
raise "Failed to connect to #{configuration.server}:#{configuration.port}" if c.nil?
|
229
|
+
c
|
230
|
+
end
|
231
|
+
end
|
95
232
|
|
233
|
+
# clears out the connection so a new one will be created
|
234
|
+
def reset_connection!
|
235
|
+
@connection = nil
|
236
|
+
end
|
237
|
+
|
238
|
+
# returns the array of documents that are waiting to be posted to solr
|
239
|
+
def pending_documents
|
240
|
+
@pending_documents ||= []
|
241
|
+
end
|
242
|
+
|
243
|
+
private
|
244
|
+
|
245
|
+
# returns the update xml buffer
|
246
|
+
def prepare_update_xml
|
247
|
+
r = ["<add>\n"]
|
248
|
+
# copy and clear pending docs
|
249
|
+
working_docs, @pending_documents = @pending_documents, nil
|
250
|
+
working_docs.each { |doc| r << doc.xml }
|
251
|
+
r << "\n</add>\n"
|
252
|
+
r.join # not sure, but I think Array#join is faster then String#<< for large buffers
|
253
|
+
end
|
254
|
+
|
255
|
+
# helper for posting data to solr
|
256
|
+
def post(buffer)
|
257
|
+
connection.post('/solr/update', buffer, {'Content-type' => 'text/xml;charset=utf-8'})
|
258
|
+
end
|
259
|
+
|
260
|
+
def success?(response_body)
|
261
|
+
response_body == '<result status="0"></result>'
|
262
|
+
end
|
263
|
+
|
96
264
|
end
|
97
265
|
end
|
data/test/test_client.rb
CHANGED
@@ -1,9 +1,119 @@
|
|
1
1
|
require File.dirname(__FILE__) + '/test_helper'
|
2
|
+
require 'rubygems'
|
3
|
+
gem 'mocha', '=0.9.0'
|
4
|
+
require 'mocha'
|
2
5
|
|
3
6
|
class ClientTest < Test::Unit::TestCase
|
4
7
|
|
5
8
|
include Test::Unit::Assertions
|
6
9
|
|
10
|
+
SUCCESS = '<result status="0"></result>'
|
11
|
+
FAILURE = '<result status="1"></result>'
|
12
|
+
CONTENT_TYPE = {'Content-type' => 'text/xml;charset=utf-8'}
|
13
|
+
|
14
|
+
class TestCache
|
15
|
+
def set(k,v,t)
|
16
|
+
@cache ||= {}
|
17
|
+
@cache[k] = v
|
18
|
+
end
|
19
|
+
|
20
|
+
def get(k)
|
21
|
+
@cache ||= {}
|
22
|
+
@cache[k]
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
@@response_buffer = %{
|
27
|
+
{
|
28
|
+
'responseHeader'=>{
|
29
|
+
'status'=>0,
|
30
|
+
'QTime'=>151,
|
31
|
+
'params'=>{
|
32
|
+
'wt'=>'ruby',
|
33
|
+
'rows'=>'10',
|
34
|
+
'explainOther'=>'',
|
35
|
+
'start'=>'0',
|
36
|
+
'hl.fl'=>'',
|
37
|
+
'indent'=>'on',
|
38
|
+
'hl'=>'on',
|
39
|
+
'q'=>'index_type:widget',
|
40
|
+
'fl'=>'*,score',
|
41
|
+
'qt'=>'standard',
|
42
|
+
'version'=>'2.2'}},
|
43
|
+
'response'=>{'numFound'=>1522698,'start'=>0,'maxScore'=>1.5583541,'docs'=>[
|
44
|
+
{
|
45
|
+
'index_type'=>'widget',
|
46
|
+
'id'=>1,
|
47
|
+
'unique_id'=>'1_widget',
|
48
|
+
'score'=>1.5583541},
|
49
|
+
{
|
50
|
+
'index_type'=>'widget',
|
51
|
+
'id'=>3,
|
52
|
+
'unique_id'=>'3_widget',
|
53
|
+
'score'=>1.5583541},
|
54
|
+
{
|
55
|
+
'index_type'=>'widget',
|
56
|
+
'id'=>4,
|
57
|
+
'unique_id'=>'4_widget',
|
58
|
+
'score'=>1.5583541},
|
59
|
+
{
|
60
|
+
'index_type'=>'widget',
|
61
|
+
'id'=>5,
|
62
|
+
'unique_id'=>'5_widget',
|
63
|
+
'score'=>1.5583541},
|
64
|
+
{
|
65
|
+
'index_type'=>'widget',
|
66
|
+
'id'=>7,
|
67
|
+
'unique_id'=>'7_widget',
|
68
|
+
'score'=>1.5583541},
|
69
|
+
{
|
70
|
+
'index_type'=>'widget',
|
71
|
+
'id'=>8,
|
72
|
+
'unique_id'=>'8_widget',
|
73
|
+
'score'=>1.5583541},
|
74
|
+
{
|
75
|
+
'index_type'=>'widget',
|
76
|
+
'id'=>9,
|
77
|
+
'unique_id'=>'9_widget',
|
78
|
+
'score'=>1.5583541},
|
79
|
+
{
|
80
|
+
'index_type'=>'widget',
|
81
|
+
'id'=>10,
|
82
|
+
'unique_id'=>'10_widget',
|
83
|
+
'score'=>1.5583541},
|
84
|
+
{
|
85
|
+
'index_type'=>'widget',
|
86
|
+
'id'=>11,
|
87
|
+
'unique_id'=>'11_widget',
|
88
|
+
'score'=>1.5583541},
|
89
|
+
{
|
90
|
+
'index_type'=>'widget',
|
91
|
+
'id'=>12,
|
92
|
+
'unique_id'=>'12_widget',
|
93
|
+
'score'=>1.5583541}]
|
94
|
+
},
|
95
|
+
'facet_counts'=>{
|
96
|
+
'facet_queries'=>{
|
97
|
+
'city_idm:19596' => 392},
|
98
|
+
'facet_fields'=>{
|
99
|
+
'available_b'=>[
|
100
|
+
'false',1328],
|
101
|
+
'onsale_b'=>[
|
102
|
+
'false',1182,
|
103
|
+
'true',174]}},
|
104
|
+
'highlighting'=>{
|
105
|
+
'1_widget'=>{},
|
106
|
+
'3_widget'=>{},
|
107
|
+
'4_widget'=>{},
|
108
|
+
'5_widget'=>{},
|
109
|
+
'7_widget'=>{},
|
110
|
+
'8_widget'=>{},
|
111
|
+
'9_widget'=>{},
|
112
|
+
'10_widget'=>{},
|
113
|
+
'11_widget'=>{},
|
114
|
+
'12_widget'=>{}}}
|
115
|
+
}
|
116
|
+
|
7
117
|
def test_create
|
8
118
|
s = nil
|
9
119
|
assert_nothing_raised do
|
@@ -12,4 +122,96 @@ class ClientTest < Test::Unit::TestCase
|
|
12
122
|
assert(s)
|
13
123
|
end
|
14
124
|
|
125
|
+
def test_commit_success
|
126
|
+
c = setup_client
|
127
|
+
c.connection.expects(:post).once.returns([nil,SUCCESS])
|
128
|
+
assert(c.commit!)
|
129
|
+
end
|
130
|
+
|
131
|
+
def test_commit_failure
|
132
|
+
c = setup_client
|
133
|
+
c.connection.expects(:post).once.returns([nil,FAILURE])
|
134
|
+
assert(!c.commit!)
|
135
|
+
end
|
136
|
+
|
137
|
+
def test_optimize_success
|
138
|
+
c = setup_client
|
139
|
+
c.connection.expects(:post).once.returns([nil,SUCCESS])
|
140
|
+
assert(c.optimize!)
|
141
|
+
end
|
142
|
+
|
143
|
+
def test_optimize_failure
|
144
|
+
c = setup_client
|
145
|
+
c.connection.expects(:post).once.returns([nil,FAILURE])
|
146
|
+
assert(!c.optimize!)
|
147
|
+
end
|
148
|
+
|
149
|
+
def test_update
|
150
|
+
c = setup_client
|
151
|
+
|
152
|
+
doc = DelSolr::Document.new
|
153
|
+
doc.add_field(:id, 123)
|
154
|
+
doc.add_field(:name, 'mp3 player')
|
155
|
+
|
156
|
+
expected_post_data = "<add>\n#{doc.xml}\n</add>\n"
|
157
|
+
|
158
|
+
assert(c.update(doc))
|
159
|
+
assert_equal(1, c.pending_documents.length)
|
160
|
+
|
161
|
+
c.connection.expects(:post).with('/solr/update', expected_post_data, CONTENT_TYPE).returns([nil,SUCCESS])
|
162
|
+
assert(c.post_update!)
|
163
|
+
assert_equal(0, c.pending_documents.length)
|
164
|
+
end
|
165
|
+
|
166
|
+
def test_update!
|
167
|
+
c = setup_client
|
168
|
+
|
169
|
+
doc = DelSolr::Document.new
|
170
|
+
doc.add_field(:id, 123)
|
171
|
+
doc.add_field(:name, 'mp3 player')
|
172
|
+
|
173
|
+
expected_post_data = "<add>\n#{doc.xml}\n</add>\n"
|
174
|
+
|
175
|
+
c.connection.expects(:post).with('/solr/update', expected_post_data, CONTENT_TYPE).returns([nil,SUCCESS])
|
176
|
+
assert(c.update!(doc))
|
177
|
+
assert_equal(0, c.pending_documents.length)
|
178
|
+
end
|
179
|
+
|
180
|
+
def test_query
|
181
|
+
c = setup_client
|
182
|
+
|
183
|
+
mock_query_builder = DelSolr::Client::QueryBuilder
|
184
|
+
mock_query_builder.stubs(:request_string).returns('/solr/select?some_query') # mock the query builder
|
185
|
+
DelSolr::Client::QueryBuilder.stubs(:new).returns(mock_query_builder)
|
186
|
+
c.connection.expects(:get).with(mock_query_builder.request_string).returns([nil, @@response_buffer]) # mock the connection
|
187
|
+
r = c.query('standard', :query => '123')
|
188
|
+
assert(r)
|
189
|
+
assert_equal([1,3,4,5,7,8,9,10,11,12], r.ids.sort)
|
190
|
+
assert(!r.from_cache?, 'should not be from cache')
|
191
|
+
end
|
192
|
+
|
193
|
+
def test_query_from_cache
|
194
|
+
c = setup_client(:cache => TestCache.new)
|
195
|
+
|
196
|
+
mock_query_builder = DelSolr::Client::QueryBuilder
|
197
|
+
mock_query_builder.stubs(:request_string).returns('/solr/select?some_query') # mock the query builder
|
198
|
+
DelSolr::Client::QueryBuilder.stubs(:new).returns(mock_query_builder)
|
199
|
+
c.connection.expects(:get).with(mock_query_builder.request_string).returns([nil, @@response_buffer]) # mock the connection
|
200
|
+
r = c.query('standard', :query => '123', :enable_caching => true)
|
201
|
+
assert(r)
|
202
|
+
assert_equal([1,3,4,5,7,8,9,10,11,12], r.ids.sort)
|
203
|
+
assert(!r.from_cache?, 'should not be from cache')
|
204
|
+
|
205
|
+
r = c.query('standard', :query => '123', :enable_caching => true)
|
206
|
+
assert(r)
|
207
|
+
assert_equal([1,3,4,5,7,8,9,10,11,12], r.ids.sort)
|
208
|
+
assert(r.from_cache?, 'this one should be from the cache')
|
209
|
+
end
|
210
|
+
|
211
|
+
private
|
212
|
+
|
213
|
+
def setup_client(options = {})
|
214
|
+
DelSolr::Client.new({:server => 'localhost', :port => 8983}.merge(options))
|
215
|
+
end
|
216
|
+
|
15
217
|
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/test_helper'
|
2
|
+
|
3
|
+
class DocumentTest < Test::Unit::TestCase
|
4
|
+
|
5
|
+
include Test::Unit::Assertions
|
6
|
+
|
7
|
+
def test_create
|
8
|
+
d = DelSolr::Document.new
|
9
|
+
assert(d)
|
10
|
+
|
11
|
+
d.add_field('person_name', 'John Smith')
|
12
|
+
|
13
|
+
buf = "<doc>\n<field name=\"person_name\">John Smith</field>\n</doc>"
|
14
|
+
|
15
|
+
assert_equal(buf, d.xml)
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_cdata
|
19
|
+
d = DelSolr::Document.new
|
20
|
+
assert(d)
|
21
|
+
|
22
|
+
d.add_field('person_name', 'John Smith', :cdata => true)
|
23
|
+
|
24
|
+
buf = "<doc>\n<field name=\"person_name\"><![CDATA[John Smith]]></field>\n</doc>"
|
25
|
+
|
26
|
+
assert_equal(buf, d.xml)
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
data/test/test_query_builder.rb
CHANGED
@@ -89,7 +89,7 @@ class QueryBuilderTest < Test::Unit::TestCase
|
|
89
89
|
assert_equal(p['q'], 'index_type:books')
|
90
90
|
end
|
91
91
|
|
92
|
-
def
|
92
|
+
def test_facets_001
|
93
93
|
qb = nil
|
94
94
|
opts = {}
|
95
95
|
opts[:query] = "games"
|
@@ -106,7 +106,7 @@ class QueryBuilderTest < Test::Unit::TestCase
|
|
106
106
|
assert_equal(p['f.on_sale_b.facet.limit'], '1')
|
107
107
|
end
|
108
108
|
|
109
|
-
def
|
109
|
+
def test_facets_002
|
110
110
|
qb = nil
|
111
111
|
opts = {}
|
112
112
|
opts[:query] = "games"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: delsolr
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ben VandenBos
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2008-
|
12
|
+
date: 2008-10-22 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -45,6 +45,7 @@ files:
|
|
45
45
|
- lib/delsolr/extensions.rb
|
46
46
|
- lib/delsolr/query_builder.rb
|
47
47
|
- lib/delsolr/response.rb
|
48
|
+
- lib/delsolr/document.rb
|
48
49
|
- lib/delsolr/version.rb
|
49
50
|
has_rdoc: true
|
50
51
|
homepage: http://delsolr.rubyforge.org
|
@@ -84,4 +85,5 @@ test_files:
|
|
84
85
|
- test/test_response.rb
|
85
86
|
- test/test_query_builder.rb
|
86
87
|
- test/test_helper.rb
|
88
|
+
- test/test_document.rb
|
87
89
|
- test/test_client.rb
|