jruby_streaming_update_solr_server 0.0.0 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +20 -11
- data/VERSION +1 -1
- data/jars/apache-solr-solrj-1.5-dev.jar +0 -0
- data/jars/commons-codec-1.3.jar +0 -0
- data/jars/commons-httpclient-3.1.jar +0 -0
- data/jars/commons-logging-1.1.1.jar +0 -0
- data/jars/slf4j-api-1.5.5.jar +0 -0
- data/jars/slf4j-jdk14-1.5.5 2.jar +0 -0
- data/lib/jruby_streaming_update_solr_server.rb +147 -9
- data/test/test_jruby_streaming_update_solr_server.rb +34 -2
- metadata +8 -2
data/README.rdoc
CHANGED
@@ -1,16 +1,25 @@
|
|
1
1
|
= jruby_streaming_update_solr_server
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
3
|
+
Some syntactic sugar on top of the StreamingUpdateSolrServer as provided by the Apache
|
4
|
+
Solr project, along with its most common argument, the SolrInputDocument.
|
5
|
+
|
6
|
+
Documentation can be generated with yard via the 'rake yard' action.
|
7
|
+
|
8
|
+
See the class files for more information, documentation, and examples.
|
9
|
+
|
10
|
+
== What is this again?
|
11
|
+
|
12
|
+
For users of Solr, there is a java class called StreamingUpdateSolrServer that allows you to easily
|
13
|
+
(and in a threaded manner) add documents (of class SolrInputDocument)
|
14
|
+
to a queue and have them automatically sent along to Solr by a user-configurable number of threads.
|
15
|
+
|
16
|
+
These classes open up those Java classes to add some JRuby sugar and make it easy for you to
|
17
|
+
construct Solr documents and send them to Solr all from within the comfort of the JRuby environment.
|
18
|
+
|
19
|
+
== WARNING: Program hangs on for 60 seconds!
|
20
|
+
|
21
|
+
*WARNING*: After your program ends, the StreamingUpdateSolrServer object hangs on for a full minute, presumably
|
22
|
+
the timeout of the underlying http client object. I haven't figure out how to change this yet.
|
14
23
|
|
15
24
|
== Copyright
|
16
25
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.1.0
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
@@ -1,9 +1,88 @@
|
|
1
|
+
if not defined? JRUBY_VERSION
|
2
|
+
raise "jruby streaming update solr server only runs under jruby"
|
3
|
+
end
|
4
|
+
|
5
|
+
# Load .jar files locally if they haven't already been pulled in.
|
6
|
+
begin
|
7
|
+
include_class Java::OrgApacheSolrClientSolrjImpl::StreamingUpdateSolrServer
|
8
|
+
include_class Java::OrgApacheSolrCommon::SolrInputDocument
|
9
|
+
rescue NameError => e
|
10
|
+
jardir = File.join(File.dirname(__FILE__), '..', 'jars')
|
11
|
+
Dir.glob("#{jardir}/*.jar") do |x|
|
12
|
+
require x
|
13
|
+
end
|
14
|
+
retry
|
15
|
+
end
|
1
16
|
|
2
|
-
|
3
|
-
|
17
|
+
# Sugar on top of the org.apache.solr.client.solr.impl.StreamingUpdateSolrServer
|
18
|
+
#
|
19
|
+
# Note that several important methods, new and commit, are direct from the java and hence
|
20
|
+
# not represented here where I'm just opening up the class to add some sugar. Full documentation
|
21
|
+
# for the raw java methods is available at
|
22
|
+
# http://lucene.apache.org/solr/api/org/apache/solr/client/solrj/impl/StreamingUpdateSolrServer.html
|
23
|
+
#
|
24
|
+
# A quick look at important java methods you can call:
|
25
|
+
#
|
26
|
+
# <b>suss = StreamingUpdateSolrServer.new(solrURL, queueSize, numberOfThreads)</b>
|
27
|
+
# The constructor.
|
28
|
+
#
|
29
|
+
# [String] solrURL The URL to your solr instance (i.e., http://solr-machine:port/solr)
|
30
|
+
# [Integer] queueSize The size of the queue from which consumer threads will pull
|
31
|
+
# documents ready to be added to Solr and actually do the sending.
|
32
|
+
# [Integer] numberOfThreads The number of consumer threads to do the sending-to-Solr
|
33
|
+
#
|
34
|
+
# <b>suss.commit</b>
|
35
|
+
# Send the commit to the solr server
|
36
|
+
#
|
37
|
+
# <b>suss.optimize</b>
|
38
|
+
# Send the optimize commnd to the Solr server
|
39
|
+
#
|
40
|
+
# <b>suss.deleteById(id)</b>
|
41
|
+
#
|
42
|
+
# <b>suss.deleteById([id1, id2, id3, ...])</b>
|
43
|
+
# Delete the given ID or IDs
|
44
|
+
#
|
45
|
+
# <b>suss.deleteByQuery(query)</b>
|
46
|
+
# Delete everything that matches +query+
|
47
|
+
# [String] query A valid solr query. Everything that matches will be deleted. So, you can ditch
|
48
|
+
# it all by sending, e.g., '*:*'
|
49
|
+
#
|
50
|
+
# @author Bill Dueber
|
4
51
|
|
5
52
|
class StreamingUpdateSolrServer
|
6
|
-
|
53
|
+
|
54
|
+
# Hang onto the java #add for internal use
|
55
|
+
alias_method :sussadd, :add
|
56
|
+
|
57
|
+
# Add a document to the SUSS
|
58
|
+
# @param [SolrInputDocument, #each_pair] doc The SolrInputDocument or hash (or hash-like object
|
59
|
+
# that responds to #each_pair) to add. The latter must be of the form solrfield => value or
|
60
|
+
# solrfield => [list, of, values]. They keys can be either symbols or strings.
|
61
|
+
#
|
62
|
+
# @example Create and add a SolrInputDocument
|
63
|
+
# url = 'http://solrmachine:port/solr' # URL to solr
|
64
|
+
# queuesize = 10 # Size of producer cache
|
65
|
+
# threads = 2 # Number of consumer threads to push docs from queue to solr
|
66
|
+
#
|
67
|
+
# suss = StreamingUpdateSolrServer.new(url,queuesize,threads)
|
68
|
+
#
|
69
|
+
# doc = SolrInputDocument.new
|
70
|
+
# doc << ['title', 'This is the title']
|
71
|
+
# doc << ['id', 1]
|
72
|
+
# suss.add doc # or suss << doc
|
73
|
+
# # repeat as desired
|
74
|
+
# suss.commit
|
75
|
+
#
|
76
|
+
# @example Create and add as a hash
|
77
|
+
# # The "hash" just needs to be an object that responds to each_pair with field,value(s)
|
78
|
+
# suss = StreamingUpdateSolrServer.new(url,queuesize,threads)
|
79
|
+
# doc = {}
|
80
|
+
# doc['title'] = This is the title'
|
81
|
+
# doc[:id] = 1 # Can also take symbols instead of strings if you like
|
82
|
+
# doc[:author] = ['Bill', 'Mike']
|
83
|
+
# suss << doc
|
84
|
+
# # repeat as desired
|
85
|
+
# suss.commit
|
7
86
|
|
8
87
|
def add doc
|
9
88
|
if doc.is_a? org.apache.solr.common.SolrInputDocument
|
@@ -13,9 +92,9 @@ class StreamingUpdateSolrServer
|
|
13
92
|
doc.each_pair do |f,v|
|
14
93
|
newdoc << [f,v]
|
15
94
|
end
|
16
|
-
|
95
|
+
sussadd newdoc
|
17
96
|
else
|
18
|
-
|
97
|
+
raise ArgumentError "Need to pass either an org.apache.solr.common.SolrInputDocument or a hash"
|
19
98
|
end
|
20
99
|
end
|
21
100
|
|
@@ -23,8 +102,25 @@ class StreamingUpdateSolrServer
|
|
23
102
|
|
24
103
|
end
|
25
104
|
|
105
|
+
# Add some sugar to the SolrInputDocument
|
106
|
+
#
|
107
|
+
# @author Bill Dueber
|
26
108
|
|
27
109
|
class SolrInputDocument
|
110
|
+
|
111
|
+
# Add a field and value(s) to the document. This is strictly additive; nothing is replaced or removed
|
112
|
+
#
|
113
|
+
# @param [Array<Symbol, String>] fv A two-element array of the form [field, value] or [field, [value1, value2, ...]]
|
114
|
+
# The field name (i.e., fv[0]) is the Solr field name as specified in schema.xml and
|
115
|
+
# must be either a string or a symbol
|
116
|
+
# @return [Array<String>] the list of current values for the field in fv[0]
|
117
|
+
#
|
118
|
+
# @example Add some fields
|
119
|
+
# doc = SolrInputDocument.new
|
120
|
+
# doc << ['title', 'Mein Kopf'] #=> ['Mein Kopf']
|
121
|
+
# doc << ['title', 'My Head!'] #=> ['Mein Kopf', 'My Head!']
|
122
|
+
# doc << ['author', ['Bill', 'Mike', 'Molly']] #=> ['Bill', 'Mike', 'Molly']
|
123
|
+
|
28
124
|
def << fv
|
29
125
|
field = fv[0]
|
30
126
|
value = fv[1]
|
@@ -41,6 +137,14 @@ class SolrInputDocument
|
|
41
137
|
self[field]
|
42
138
|
end
|
43
139
|
|
140
|
+
|
141
|
+
# Get a list of the currently-set values for the passed field
|
142
|
+
#
|
143
|
+
# Note that this will always return either nil (not found) or an array, even of one element
|
144
|
+
#
|
145
|
+
# @param [String, Symbol] field The field whose values you want (as String or Symbol)
|
146
|
+
# @return [Array<String>] An array of values (or nil on not found)
|
147
|
+
#
|
44
148
|
def [] field
|
45
149
|
if field.is_a?(Symbol)
|
46
150
|
field = field.to_s
|
@@ -52,10 +156,25 @@ class SolrInputDocument
|
|
52
156
|
if v.class == Java::JavaUtil::ArrayList
|
53
157
|
return v.to_a
|
54
158
|
else
|
55
|
-
return v
|
159
|
+
return [v]
|
56
160
|
end
|
57
161
|
end
|
58
162
|
|
163
|
+
# Set the value(s) for the given field, destroying any values that were already in there
|
164
|
+
#
|
165
|
+
# Note that this is destructive; see #<< to add multiple values to a field
|
166
|
+
#
|
167
|
+
# @param [String, Symbol] field The solr field you're setting the value of
|
168
|
+
# @param [String, Array<String>] value The value or array of values to set
|
169
|
+
# @return [Array<String>] The list of values (i.e., either +value+ or +[value]+)
|
170
|
+
#
|
171
|
+
# @example
|
172
|
+
# doc = SolrInputDocument.new
|
173
|
+
# doc[:id] = 1 #=> [1]
|
174
|
+
# doc[:author] = 'Mike' #=> ['Mike']
|
175
|
+
# doc[:author] = 'Bill' #=> ['Bill']
|
176
|
+
# doc[:author] #=> ['Bill']
|
177
|
+
|
59
178
|
def []= field, value
|
60
179
|
if field.is_a?(Symbol)
|
61
180
|
field = field.to_s
|
@@ -64,8 +183,29 @@ class SolrInputDocument
|
|
64
183
|
self[field]
|
65
184
|
end
|
66
185
|
|
186
|
+
|
187
|
+
# Add keys and values from a hash or hash-like object to the document without removing any
|
188
|
+
# already-added values.
|
189
|
+
#
|
190
|
+
# @param [#each_pair] h A set of field=>value pairs, probably in a Hash. Can be either
|
191
|
+
# field=>value or field=>[list,of,values]
|
192
|
+
#
|
193
|
+
# @example Merge a hash into an existing document
|
194
|
+
# doc = SolrInputDocument.new
|
195
|
+
# doc << [:author, 'Bill']
|
196
|
+
# h = {}
|
197
|
+
# h['author'] = 'Mike'
|
198
|
+
# h['id'] = 1
|
199
|
+
# h[:copies] = ['Grad reference', 'Long-term storage']
|
200
|
+
# doc.merge! h
|
201
|
+
# doc[:id] #=> 1
|
202
|
+
# doc[:author] #=> ['Bill', 'Mike']
|
203
|
+
# doc[:copies] #=> ['Grad reference', 'Long-term storage']
|
204
|
+
|
67
205
|
def merge! h
|
68
|
-
|
206
|
+
unless h.respond_to? :each_pair
|
207
|
+
raise ArgumentError, "Argument must respond to #each_pair"
|
208
|
+
end
|
69
209
|
h.each_pair do |k,v|
|
70
210
|
self << [k,v]
|
71
211
|
end
|
@@ -73,5 +213,3 @@ class SolrInputDocument
|
|
73
213
|
end
|
74
214
|
|
75
215
|
|
76
|
-
|
77
|
-
|
@@ -1,7 +1,39 @@
|
|
1
1
|
require 'helper'
|
2
2
|
|
3
3
|
class TestJrubyStreamingUpdateSolrServer < Test::Unit::TestCase
|
4
|
-
should "
|
5
|
-
|
4
|
+
should "Write some tests, but don't know how to mock up a solr server" do
|
5
|
+
assert_equal 1, 1
|
6
6
|
end
|
7
|
+
|
8
|
+
should "Report nil for a document that doesn't include a field" do
|
9
|
+
doc = SolrInputDocument.new
|
10
|
+
assert_equal nil, doc[:notinthere]
|
11
|
+
end
|
12
|
+
|
13
|
+
should "Return single and multiple values in arrays" do
|
14
|
+
doc = SolrInputDocument.new
|
15
|
+
doc << [:id, 1]
|
16
|
+
assert_equal [1], doc[:id]
|
17
|
+
doc << [:id, 2]
|
18
|
+
assert_equal [1,2], doc[:id]
|
19
|
+
end
|
20
|
+
|
21
|
+
should "Add items in hash via merge!" do
|
22
|
+
doc = SolrInputDocument.new
|
23
|
+
doc << [:id, 1]
|
24
|
+
h = {:id => 2, :name => 'Bill'}
|
25
|
+
doc.merge! h
|
26
|
+
assert_equal [1,2], doc[:id]
|
27
|
+
assert_equal ['Bill'], doc[:name]
|
28
|
+
end
|
29
|
+
|
30
|
+
should "Destroy existing items via []=" do
|
31
|
+
doc = SolrInputDocument.new
|
32
|
+
doc[:id] = 1
|
33
|
+
assert_equal [1], doc[:id]
|
34
|
+
doc[:id] = 2
|
35
|
+
assert_equal [2], doc[:id]
|
36
|
+
end
|
37
|
+
|
38
|
+
|
7
39
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jruby_streaming_update_solr_server
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Bill Dueber
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-02-
|
12
|
+
date: 2010-02-12 00:00:00 -05:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -48,6 +48,12 @@ files:
|
|
48
48
|
- README.rdoc
|
49
49
|
- Rakefile
|
50
50
|
- VERSION
|
51
|
+
- jars/apache-solr-solrj-1.5-dev.jar
|
52
|
+
- jars/commons-codec-1.3.jar
|
53
|
+
- jars/commons-httpclient-3.1.jar
|
54
|
+
- jars/commons-logging-1.1.1.jar
|
55
|
+
- jars/slf4j-api-1.5.5.jar
|
56
|
+
- jars/slf4j-jdk14-1.5.5 2.jar
|
51
57
|
- lib/jruby_streaming_update_solr_server.rb
|
52
58
|
- test/helper.rb
|
53
59
|
- test/test_jruby_streaming_update_solr_server.rb
|