jruby_streaming_update_solr_server 0.0.0 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +20 -11
- data/VERSION +1 -1
- data/jars/apache-solr-solrj-1.5-dev.jar +0 -0
- data/jars/commons-codec-1.3.jar +0 -0
- data/jars/commons-httpclient-3.1.jar +0 -0
- data/jars/commons-logging-1.1.1.jar +0 -0
- data/jars/slf4j-api-1.5.5.jar +0 -0
- data/jars/slf4j-jdk14-1.5.5 2.jar +0 -0
- data/lib/jruby_streaming_update_solr_server.rb +147 -9
- data/test/test_jruby_streaming_update_solr_server.rb +34 -2
- metadata +8 -2
data/README.rdoc
CHANGED
@@ -1,16 +1,25 @@
|
|
1
1
|
= jruby_streaming_update_solr_server
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
3
|
+
Some syntactic sugar on top of the StreamingUpdateSolrServer as provided by the Apache
|
4
|
+
Solr project, along with its most common argument, the SolrInputDocument.
|
5
|
+
|
6
|
+
Documentation can be generated with yard via the 'rake yard' action.
|
7
|
+
|
8
|
+
See the class files for more information, documentation, and examples.
|
9
|
+
|
10
|
+
== What is this again?
|
11
|
+
|
12
|
+
For users of Solr, there is a java class called StreamingUpdateSolrServer that allows you to easily
|
13
|
+
(and in a threaded manner) add documents (of class SolrInputDocument)
|
14
|
+
to a queue and have them automatically sent along to Solr by a user-configurable number of threads.
|
15
|
+
|
16
|
+
These classes open up those Java classes to add some JRuby sugar and make it easy for you to
|
17
|
+
construct Solr documents and send them to Solr all from within the comfort of the JRuby environment.
|
18
|
+
|
19
|
+
== WARNING: Program hangs on for 60 seconds!
|
20
|
+
|
21
|
+
*WARNING*: After your program ends, the StreamingUpdateSolrServer object hangs on for a full minute, presumably
|
22
|
+
the timeout of the underlying http client object. I haven't figure out how to change this yet.
|
14
23
|
|
15
24
|
== Copyright
|
16
25
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.1.0
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
@@ -1,9 +1,88 @@
|
|
1
|
+
if not defined? JRUBY_VERSION
|
2
|
+
raise "jruby streaming update solr server only runs under jruby"
|
3
|
+
end
|
4
|
+
|
5
|
+
# Load .jar files locally if they haven't already been pulled in.
|
6
|
+
begin
|
7
|
+
include_class Java::OrgApacheSolrClientSolrjImpl::StreamingUpdateSolrServer
|
8
|
+
include_class Java::OrgApacheSolrCommon::SolrInputDocument
|
9
|
+
rescue NameError => e
|
10
|
+
jardir = File.join(File.dirname(__FILE__), '..', 'jars')
|
11
|
+
Dir.glob("#{jardir}/*.jar") do |x|
|
12
|
+
require x
|
13
|
+
end
|
14
|
+
retry
|
15
|
+
end
|
1
16
|
|
2
|
-
|
3
|
-
|
17
|
+
# Sugar on top of the org.apache.solr.client.solr.impl.StreamingUpdateSolrServer
|
18
|
+
#
|
19
|
+
# Note that several important methods, new and commit, are direct from the java and hence
|
20
|
+
# not represented here where I'm just opening up the class to add some sugar. Full documentation
|
21
|
+
# for the raw java methods is available at
|
22
|
+
# http://lucene.apache.org/solr/api/org/apache/solr/client/solrj/impl/StreamingUpdateSolrServer.html
|
23
|
+
#
|
24
|
+
# A quick look at important java methods you can call:
|
25
|
+
#
|
26
|
+
# <b>suss = StreamingUpdateSolrServer.new(solrURL, queueSize, numberOfThreads)</b>
|
27
|
+
# The constructor.
|
28
|
+
#
|
29
|
+
# [String] solrURL The URL to your solr instance (i.e., http://solr-machine:port/solr)
|
30
|
+
# [Integer] queueSize The size of the queue from which consumer threads will pull
|
31
|
+
# documents ready to be added to Solr and actually do the sending.
|
32
|
+
# [Integer] numberOfThreads The number of consumer threads to do the sending-to-Solr
|
33
|
+
#
|
34
|
+
# <b>suss.commit</b>
|
35
|
+
# Send the commit to the solr server
|
36
|
+
#
|
37
|
+
# <b>suss.optimize</b>
|
38
|
+
# Send the optimize commnd to the Solr server
|
39
|
+
#
|
40
|
+
# <b>suss.deleteById(id)</b>
|
41
|
+
#
|
42
|
+
# <b>suss.deleteById([id1, id2, id3, ...])</b>
|
43
|
+
# Delete the given ID or IDs
|
44
|
+
#
|
45
|
+
# <b>suss.deleteByQuery(query)</b>
|
46
|
+
# Delete everything that matches +query+
|
47
|
+
# [String] query A valid solr query. Everything that matches will be deleted. So, you can ditch
|
48
|
+
# it all by sending, e.g., '*:*'
|
49
|
+
#
|
50
|
+
# @author Bill Dueber
|
4
51
|
|
5
52
|
class StreamingUpdateSolrServer
|
6
|
-
|
53
|
+
|
54
|
+
# Hang onto the java #add for internal use
|
55
|
+
alias_method :sussadd, :add
|
56
|
+
|
57
|
+
# Add a document to the SUSS
|
58
|
+
# @param [SolrInputDocument, #each_pair] doc The SolrInputDocument or hash (or hash-like object
|
59
|
+
# that responds to #each_pair) to add. The latter must be of the form solrfield => value or
|
60
|
+
# solrfield => [list, of, values]. They keys can be either symbols or strings.
|
61
|
+
#
|
62
|
+
# @example Create and add a SolrInputDocument
|
63
|
+
# url = 'http://solrmachine:port/solr' # URL to solr
|
64
|
+
# queuesize = 10 # Size of producer cache
|
65
|
+
# threads = 2 # Number of consumer threads to push docs from queue to solr
|
66
|
+
#
|
67
|
+
# suss = StreamingUpdateSolrServer.new(url,queuesize,threads)
|
68
|
+
#
|
69
|
+
# doc = SolrInputDocument.new
|
70
|
+
# doc << ['title', 'This is the title']
|
71
|
+
# doc << ['id', 1]
|
72
|
+
# suss.add doc # or suss << doc
|
73
|
+
# # repeat as desired
|
74
|
+
# suss.commit
|
75
|
+
#
|
76
|
+
# @example Create and add as a hash
|
77
|
+
# # The "hash" just needs to be an object that responds to each_pair with field,value(s)
|
78
|
+
# suss = StreamingUpdateSolrServer.new(url,queuesize,threads)
|
79
|
+
# doc = {}
|
80
|
+
# doc['title'] = This is the title'
|
81
|
+
# doc[:id] = 1 # Can also take symbols instead of strings if you like
|
82
|
+
# doc[:author] = ['Bill', 'Mike']
|
83
|
+
# suss << doc
|
84
|
+
# # repeat as desired
|
85
|
+
# suss.commit
|
7
86
|
|
8
87
|
def add doc
|
9
88
|
if doc.is_a? org.apache.solr.common.SolrInputDocument
|
@@ -13,9 +92,9 @@ class StreamingUpdateSolrServer
|
|
13
92
|
doc.each_pair do |f,v|
|
14
93
|
newdoc << [f,v]
|
15
94
|
end
|
16
|
-
|
95
|
+
sussadd newdoc
|
17
96
|
else
|
18
|
-
|
97
|
+
raise ArgumentError "Need to pass either an org.apache.solr.common.SolrInputDocument or a hash"
|
19
98
|
end
|
20
99
|
end
|
21
100
|
|
@@ -23,8 +102,25 @@ class StreamingUpdateSolrServer
|
|
23
102
|
|
24
103
|
end
|
25
104
|
|
105
|
+
# Add some sugar to the SolrInputDocument
|
106
|
+
#
|
107
|
+
# @author Bill Dueber
|
26
108
|
|
27
109
|
class SolrInputDocument
|
110
|
+
|
111
|
+
# Add a field and value(s) to the document. This is strictly additive; nothing is replaced or removed
|
112
|
+
#
|
113
|
+
# @param [Array<Symbol, String>] fv A two-element array of the form [field, value] or [field, [value1, value2, ...]]
|
114
|
+
# The field name (i.e., fv[0]) is the Solr field name as specified in schema.xml and
|
115
|
+
# must be either a string or a symbol
|
116
|
+
# @return [Array<String>] the list of current values for the field in fv[0]
|
117
|
+
#
|
118
|
+
# @example Add some fields
|
119
|
+
# doc = SolrInputDocument.new
|
120
|
+
# doc << ['title', 'Mein Kopf'] #=> ['Mein Kopf']
|
121
|
+
# doc << ['title', 'My Head!'] #=> ['Mein Kopf', 'My Head!']
|
122
|
+
# doc << ['author', ['Bill', 'Mike', 'Molly']] #=> ['Bill', 'Mike', 'Molly']
|
123
|
+
|
28
124
|
def << fv
|
29
125
|
field = fv[0]
|
30
126
|
value = fv[1]
|
@@ -41,6 +137,14 @@ class SolrInputDocument
|
|
41
137
|
self[field]
|
42
138
|
end
|
43
139
|
|
140
|
+
|
141
|
+
# Get a list of the currently-set values for the passed field
|
142
|
+
#
|
143
|
+
# Note that this will always return either nil (not found) or an array, even of one element
|
144
|
+
#
|
145
|
+
# @param [String, Symbol] field The field whose values you want (as String or Symbol)
|
146
|
+
# @return [Array<String>] An array of values (or nil on not found)
|
147
|
+
#
|
44
148
|
def [] field
|
45
149
|
if field.is_a?(Symbol)
|
46
150
|
field = field.to_s
|
@@ -52,10 +156,25 @@ class SolrInputDocument
|
|
52
156
|
if v.class == Java::JavaUtil::ArrayList
|
53
157
|
return v.to_a
|
54
158
|
else
|
55
|
-
return v
|
159
|
+
return [v]
|
56
160
|
end
|
57
161
|
end
|
58
162
|
|
163
|
+
# Set the value(s) for the given field, destroying any values that were already in there
|
164
|
+
#
|
165
|
+
# Note that this is destructive; see #<< to add multiple values to a field
|
166
|
+
#
|
167
|
+
# @param [String, Symbol] field The solr field you're setting the value of
|
168
|
+
# @param [String, Array<String>] value The value or array of values to set
|
169
|
+
# @return [Array<String>] The list of values (i.e., either +value+ or +[value]+)
|
170
|
+
#
|
171
|
+
# @example
|
172
|
+
# doc = SolrInputDocument.new
|
173
|
+
# doc[:id] = 1 #=> [1]
|
174
|
+
# doc[:author] = 'Mike' #=> ['Mike']
|
175
|
+
# doc[:author] = 'Bill' #=> ['Bill']
|
176
|
+
# doc[:author] #=> ['Bill']
|
177
|
+
|
59
178
|
def []= field, value
|
60
179
|
if field.is_a?(Symbol)
|
61
180
|
field = field.to_s
|
@@ -64,8 +183,29 @@ class SolrInputDocument
|
|
64
183
|
self[field]
|
65
184
|
end
|
66
185
|
|
186
|
+
|
187
|
+
# Add keys and values from a hash or hash-like object to the document without removing any
|
188
|
+
# already-added values.
|
189
|
+
#
|
190
|
+
# @param [#each_pair] h A set of field=>value pairs, probably in a Hash. Can be either
|
191
|
+
# field=>value or field=>[list,of,values]
|
192
|
+
#
|
193
|
+
# @example Merge a hash into an existing document
|
194
|
+
# doc = SolrInputDocument.new
|
195
|
+
# doc << [:author, 'Bill']
|
196
|
+
# h = {}
|
197
|
+
# h['author'] = 'Mike'
|
198
|
+
# h['id'] = 1
|
199
|
+
# h[:copies] = ['Grad reference', 'Long-term storage']
|
200
|
+
# doc.merge! h
|
201
|
+
# doc[:id] #=> 1
|
202
|
+
# doc[:author] #=> ['Bill', 'Mike']
|
203
|
+
# doc[:copies] #=> ['Grad reference', 'Long-term storage']
|
204
|
+
|
67
205
|
def merge! h
|
68
|
-
|
206
|
+
unless h.respond_to? :each_pair
|
207
|
+
raise ArgumentError, "Argument must respond to #each_pair"
|
208
|
+
end
|
69
209
|
h.each_pair do |k,v|
|
70
210
|
self << [k,v]
|
71
211
|
end
|
@@ -73,5 +213,3 @@ class SolrInputDocument
|
|
73
213
|
end
|
74
214
|
|
75
215
|
|
76
|
-
|
77
|
-
|
@@ -1,7 +1,39 @@
|
|
1
1
|
require 'helper'
|
2
2
|
|
3
3
|
class TestJrubyStreamingUpdateSolrServer < Test::Unit::TestCase
|
4
|
-
should "
|
5
|
-
|
4
|
+
should "Write some tests, but don't know how to mock up a solr server" do
|
5
|
+
assert_equal 1, 1
|
6
6
|
end
|
7
|
+
|
8
|
+
should "Report nil for a document that doesn't include a field" do
|
9
|
+
doc = SolrInputDocument.new
|
10
|
+
assert_equal nil, doc[:notinthere]
|
11
|
+
end
|
12
|
+
|
13
|
+
should "Return single and multiple values in arrays" do
|
14
|
+
doc = SolrInputDocument.new
|
15
|
+
doc << [:id, 1]
|
16
|
+
assert_equal [1], doc[:id]
|
17
|
+
doc << [:id, 2]
|
18
|
+
assert_equal [1,2], doc[:id]
|
19
|
+
end
|
20
|
+
|
21
|
+
should "Add items in hash via merge!" do
|
22
|
+
doc = SolrInputDocument.new
|
23
|
+
doc << [:id, 1]
|
24
|
+
h = {:id => 2, :name => 'Bill'}
|
25
|
+
doc.merge! h
|
26
|
+
assert_equal [1,2], doc[:id]
|
27
|
+
assert_equal ['Bill'], doc[:name]
|
28
|
+
end
|
29
|
+
|
30
|
+
should "Destroy existing items via []=" do
|
31
|
+
doc = SolrInputDocument.new
|
32
|
+
doc[:id] = 1
|
33
|
+
assert_equal [1], doc[:id]
|
34
|
+
doc[:id] = 2
|
35
|
+
assert_equal [2], doc[:id]
|
36
|
+
end
|
37
|
+
|
38
|
+
|
7
39
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jruby_streaming_update_solr_server
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Bill Dueber
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-02-
|
12
|
+
date: 2010-02-12 00:00:00 -05:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -48,6 +48,12 @@ files:
|
|
48
48
|
- README.rdoc
|
49
49
|
- Rakefile
|
50
50
|
- VERSION
|
51
|
+
- jars/apache-solr-solrj-1.5-dev.jar
|
52
|
+
- jars/commons-codec-1.3.jar
|
53
|
+
- jars/commons-httpclient-3.1.jar
|
54
|
+
- jars/commons-logging-1.1.1.jar
|
55
|
+
- jars/slf4j-api-1.5.5.jar
|
56
|
+
- jars/slf4j-jdk14-1.5.5 2.jar
|
51
57
|
- lib/jruby_streaming_update_solr_server.rb
|
52
58
|
- test/helper.rb
|
53
59
|
- test/test_jruby_streaming_update_solr_server.rb
|