rsolr-async 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +20 -0
- data/README.rdoc +23 -0
- data/Rakefile +52 -0
- data/VERSION +1 -0
- data/examples/bulk_indexer.rb +65 -0
- data/lib/rsolr-async.rb +102 -0
- data/spec/rsolr-async_spec.rb +69 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +9 -0
- metadata +110 -0
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 Matt Mitchell
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
= rsolr-async
|
2
|
+
rsolr-async enhances the RSolr core library by adding the ability to use EventMachine as a connection adapter.
|
3
|
+
|
4
|
+
=Requirements
|
5
|
+
Ruby 1.9 is required, as well as the em-http-request and eventmachine gems.
|
6
|
+
|
7
|
+
=How
|
8
|
+
Just pass-in :async to the RSolr.connect method:
|
9
|
+
require 'rsolr-async'
|
10
|
+
rsolr = RSolr.connect(:async, :url => 'http://localhost:8983/solr')
|
11
|
+
|
12
|
+
== Note on Patches/Pull Requests
|
13
|
+
|
14
|
+
* Fork the project.
|
15
|
+
* Make your feature addition or bug fix.
|
16
|
+
* Add tests for it. This is important so I don't break it in a future version unintentionally.
|
17
|
+
* Commit, do not mess with rakefile, version, or history
|
18
|
+
(if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
|
19
|
+
* Send me a pull request. Bonus points for topic branches.
|
20
|
+
|
21
|
+
== Copyright
|
22
|
+
|
23
|
+
Copyright (c) 2010 Matt Mitchell. See LICENSE for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "rsolr-async"
|
8
|
+
gem.summary = %Q{An EventMachine based connection adapter for RSolr}
|
9
|
+
gem.description = %Q{Provides asynchronous connections to Solr}
|
10
|
+
gem.email = "goodieboy@gmail.com"
|
11
|
+
gem.homepage = "http://github.com/mwmitchell/rsolr-async"
|
12
|
+
gem.authors = ["Matt Mitchell", "Mike Perham"]
|
13
|
+
gem.add_development_dependency "rspec", ">= 1.2.9"
|
14
|
+
gem.add_dependency "rsolr", ">= 0.12.1"
|
15
|
+
gem.add_dependency "eventmachine", ">= 0.12.10"
|
16
|
+
gem.add_dependency "em-http-request", ">= 0.2.6"
|
17
|
+
|
18
|
+
gem.files = FileList['lib/**/*.rb', 'examples/**', 'LICENSE', 'README.rdoc', 'VERSION']
|
19
|
+
gem.test_files = ['spec/*', 'Rakefile', 'solr/example/**/*']
|
20
|
+
|
21
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
22
|
+
end
|
23
|
+
# Jeweler::GemcutterTasks.new
|
24
|
+
rescue LoadError
|
25
|
+
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
26
|
+
end
|
27
|
+
|
28
|
+
require 'spec/rake/spectask'
|
29
|
+
Spec::Rake::SpecTask.new(:spec) do |spec|
|
30
|
+
spec.libs << 'lib' << 'spec'
|
31
|
+
spec.spec_files = FileList['spec/**/*_spec.rb']
|
32
|
+
end
|
33
|
+
|
34
|
+
Spec::Rake::SpecTask.new(:rcov) do |spec|
|
35
|
+
spec.libs << 'lib' << 'spec'
|
36
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
37
|
+
spec.rcov = true
|
38
|
+
end
|
39
|
+
|
40
|
+
task :spec => :check_dependencies
|
41
|
+
|
42
|
+
task :default => :spec
|
43
|
+
|
44
|
+
require 'rake/rdoctask'
|
45
|
+
Rake::RDocTask.new do |rdoc|
|
46
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
47
|
+
|
48
|
+
rdoc.rdoc_dir = 'rdoc'
|
49
|
+
rdoc.title = "rsolr-async #{version}"
|
50
|
+
rdoc.rdoc_files.include('README*')
|
51
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
52
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0
|
@@ -0,0 +1,65 @@
|
|
1
|
+
require 'benchmark'
|
2
|
+
require 'rsolr'
|
3
|
+
require 'rsolr-async'
|
4
|
+
|
5
|
+
# Bulk load a number of documents into Solr.
|
6
|
+
# Ensures that Solr commits the changes once per minute.
|
7
|
+
class BulkIndexer
|
8
|
+
|
9
|
+
def index(docs)
|
10
|
+
puts "indexing #{docs.size} docs"
|
11
|
+
Benchmark.measure do
|
12
|
+
docs.each_with_index do |doc,index|
|
13
|
+
doc.merge!(:id => Time.now.to_f.to_s)
|
14
|
+
result = rsolr.update(rsolr.message.add(doc, :commitWithin => 60000), :wt => 'ruby')
|
15
|
+
raise RuntimeError, result if result !~ /'status'=>0/
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def rsolr
|
23
|
+
@solr ||= RSolr.connect(:async, :url => 'http://localhost:8983/solr')
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
# Use like so:
|
28
|
+
#
|
29
|
+
# ruby -Ilib examples/bulk_indexer.rb
|
30
|
+
#
|
31
|
+
# You will need to change doc to reflect the fields in your Solr's schema.xml.
|
32
|
+
doc = {
|
33
|
+
'content' => <<-EOS
|
34
|
+
Teams have doubled the number of scouts on the payroll over the last two decades to an average of 12 while their marketing departments, which are charged with keeping the revenue rolling in, have tripled in size to about 20 employees each. Moreover, teams have spent millions on training facilities, video-editing technology, nutritionists, specialized catering services and high-tech meeting rooms.
|
35
|
+
|
36
|
+
Indianapolis linebackers coach Mike Murphy, who has been an NFL coach for more than two decades, says he remembers when computers, introduced in the 1990s, first eliminated the hours coaches used to spend splicing game film. But instead of going home at a reasonable hour, he says, coaches started working more. "We're so paranoid that we explore every possibility, every angle," he says. "You have so much information that you can confuse yourself. You can go nuts."
|
37
|
+
EOS
|
38
|
+
}
|
39
|
+
docs = Array.new(1000, doc)
|
40
|
+
|
41
|
+
EM.run do
|
42
|
+
# We create 5 fibers, which allows us to run 5 I/O operations
|
43
|
+
# in parallel. In practice, you won't see a huge speedup if Solr
|
44
|
+
# is running on localhost. On my machine:
|
45
|
+
# Indexing done in 4.698831 seconds with 1 concurrency
|
46
|
+
# Indexing done in 3.898857 seconds with 2 concurrency
|
47
|
+
# Indexing done in 3.520972 seconds with 5 concurrency
|
48
|
+
|
49
|
+
a = Time.now
|
50
|
+
concurrency = 5
|
51
|
+
completed = 0
|
52
|
+
concurrency.times do |idx|
|
53
|
+
|
54
|
+
Fiber.new do
|
55
|
+
bl = BulkIndexer.new
|
56
|
+
bl.index(docs.slice(idx*(1000/concurrency), 1000/concurrency))
|
57
|
+
completed += 1
|
58
|
+
if completed == concurrency
|
59
|
+
puts "Indexing done in #{Time.now - a} seconds with #{concurrency} concurrency"
|
60
|
+
EM.stop
|
61
|
+
end
|
62
|
+
end.resume
|
63
|
+
|
64
|
+
end
|
65
|
+
end
|
data/lib/rsolr-async.rb
ADDED
@@ -0,0 +1,102 @@
|
|
1
|
+
raise RuntimeError, "EventMachine connection requires Ruby 1.9" if RUBY_VERSION < '1.9'
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'rsolr'
|
5
|
+
require 'em-http'
|
6
|
+
require 'fiber'
|
7
|
+
|
8
|
+
#
|
9
|
+
# Connection for EventMachine
|
10
|
+
#
|
11
|
+
module RSolr::Async
|
12
|
+
|
13
|
+
module Connectable
|
14
|
+
|
15
|
+
def connect *args, &blk
|
16
|
+
if args.first == :async
|
17
|
+
rsolr = RSolr::Client.new(RSolr::Async::Connection.new(*args[1..-1]))
|
18
|
+
block_given? ? (yield rsolr) : rsolr
|
19
|
+
else
|
20
|
+
super *args, &blk
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
RSolr.extend RSolr::Async::Connectable
|
27
|
+
|
28
|
+
#
|
29
|
+
# Evented Connection for standard HTTP Solr server
|
30
|
+
#
|
31
|
+
class Connection
|
32
|
+
|
33
|
+
include RSolr::Connection::Requestable
|
34
|
+
|
35
|
+
REQUEST_CLASS = EM::HttpRequest
|
36
|
+
|
37
|
+
protected
|
38
|
+
|
39
|
+
def connection path
|
40
|
+
REQUEST_CLASS.new("#{@uri.to_s}#{path}")
|
41
|
+
end
|
42
|
+
|
43
|
+
def timeout
|
44
|
+
opts[:timeout] || 5
|
45
|
+
end
|
46
|
+
|
47
|
+
def get path, params={}
|
48
|
+
# this yield/resume business is complicated by em-http's mocking support which
|
49
|
+
# yields to the callback immediately rather than from another fiber.
|
50
|
+
yielding = true
|
51
|
+
fiber = Fiber.current
|
52
|
+
http_response = self.connection(path).get :query => params, :timeout => timeout
|
53
|
+
http_response.callback do
|
54
|
+
yielding = false
|
55
|
+
fiber.resume if Fiber.current != fiber
|
56
|
+
end
|
57
|
+
http_response.errback do
|
58
|
+
yielding = false
|
59
|
+
fiber.resume if Fiber.current != fiber
|
60
|
+
end
|
61
|
+
Fiber.yield if yielding
|
62
|
+
create_http_context http_response, path, params
|
63
|
+
end
|
64
|
+
|
65
|
+
def post path, data, params={}, headers={}
|
66
|
+
yielding = true
|
67
|
+
fiber = Fiber.current
|
68
|
+
http_response = self.connection(path).post :query => params, :body => data, :head => headers, :timeout => timeout
|
69
|
+
http_response.callback do
|
70
|
+
yielding = false
|
71
|
+
fiber.resume if Fiber.current != fiber
|
72
|
+
end
|
73
|
+
http_response.errback do
|
74
|
+
yielding = false
|
75
|
+
fiber.resume if Fiber.current != fiber
|
76
|
+
end
|
77
|
+
Fiber.yield if yielding
|
78
|
+
create_http_context http_response, path, params, data, headers
|
79
|
+
end
|
80
|
+
|
81
|
+
def create_http_context http_response, path, params, data=nil, headers={}
|
82
|
+
full_url = "#{@uri.to_s}#{path}"
|
83
|
+
{
|
84
|
+
:status_code=>http_response.response_header.status,
|
85
|
+
:url=>full_url,
|
86
|
+
:body=>encode_utf8(http_response.response),
|
87
|
+
:path=>path,
|
88
|
+
:params=>params,
|
89
|
+
:data=>data,
|
90
|
+
:headers=>headers,
|
91
|
+
}
|
92
|
+
end
|
93
|
+
|
94
|
+
# accepts a path/string and optional hash of query params
|
95
|
+
def build_url path, params={}
|
96
|
+
full_path = @uri.path + path
|
97
|
+
super full_path, params, @uri.query
|
98
|
+
end
|
99
|
+
|
100
|
+
end
|
101
|
+
|
102
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
+
|
3
|
+
require 'em-http'
|
4
|
+
require 'em-http/mock'
|
5
|
+
|
6
|
+
describe RSolr::Async do
|
7
|
+
|
8
|
+
context 'initialization' do
|
9
|
+
|
10
|
+
it 'should modifiy RSolr' do
|
11
|
+
RSolr.should be_a(RSolr::Async::Connectable)
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'should not change the default connect behavior' do
|
15
|
+
RSolr.connect.connection.should be_a(RSolr::Connection::NetHttp)
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'should create an instance of RSolr::Async::Connection when :async is used' do
|
19
|
+
RSolr.connect(:async, :url=>'http://localhost:8983/solr').connection.should be_a(RSolr::Async::Connection)
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
RSolr::Async::Connection::REQUEST_CLASS = EM::MockHttpRequest
|
25
|
+
|
26
|
+
let(:new_connection){ RSolr::Async::Connection.new }
|
27
|
+
|
28
|
+
context '#request' do
|
29
|
+
it 'should forward simple, non-data calls to #get' do
|
30
|
+
|
31
|
+
EM.run do
|
32
|
+
|
33
|
+
EM.add_timer(1) do
|
34
|
+
EM.stop
|
35
|
+
end
|
36
|
+
|
37
|
+
EM::MockHttpRequest.pass_through_requests = false
|
38
|
+
body = <<-EOM
|
39
|
+
HTTP/1.1 200 OK
|
40
|
+
Date: Mon, 16 Nov 2009 20:39:15 GMT
|
41
|
+
Expires: -1
|
42
|
+
Cache-Control: private, max-age=0
|
43
|
+
Content-Type: text/xml; charset=utf-8
|
44
|
+
Connection: close
|
45
|
+
|
46
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
47
|
+
<response>
|
48
|
+
<lst name="responseHeader"><int name="status">0</int><int name="QTime">1</int><lst name="params"><str name="q">a</str></lst></lst><result name="response" numFound="0" start="0"/>
|
49
|
+
</response>
|
50
|
+
EOM
|
51
|
+
EM::MockHttpRequest.register 'http://127.0.0.1:8983/solr/select?q=a', :get, body
|
52
|
+
|
53
|
+
Fiber.new do
|
54
|
+
begin
|
55
|
+
http = new_connection
|
56
|
+
resp = http.request('/select', :q=>'a')
|
57
|
+
resp[:status_code].should == 200
|
58
|
+
rescue Exception => ex
|
59
|
+
puts ex.message
|
60
|
+
puts ex.backtrace.join("\n")
|
61
|
+
end
|
62
|
+
EM.stop
|
63
|
+
end.resume
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
data/spec/spec.opts
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--color
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,110 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: rsolr-async
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease:
|
5
|
+
version: 0.1.0
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Matt Mitchell
|
9
|
+
- Mike Perham
|
10
|
+
autorequire:
|
11
|
+
bindir: bin
|
12
|
+
cert_chain: []
|
13
|
+
|
14
|
+
date: 2010-02-04 00:00:00 Z
|
15
|
+
dependencies:
|
16
|
+
- !ruby/object:Gem::Dependency
|
17
|
+
name: rspec
|
18
|
+
prerelease: false
|
19
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
20
|
+
none: false
|
21
|
+
requirements:
|
22
|
+
- - ">="
|
23
|
+
- !ruby/object:Gem::Version
|
24
|
+
version: 1.2.9
|
25
|
+
type: :development
|
26
|
+
version_requirements: *id001
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rsolr
|
29
|
+
prerelease: false
|
30
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
31
|
+
none: false
|
32
|
+
requirements:
|
33
|
+
- - ">="
|
34
|
+
- !ruby/object:Gem::Version
|
35
|
+
version: 0.12.1
|
36
|
+
type: :runtime
|
37
|
+
version_requirements: *id002
|
38
|
+
- !ruby/object:Gem::Dependency
|
39
|
+
name: eventmachine
|
40
|
+
prerelease: false
|
41
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
42
|
+
none: false
|
43
|
+
requirements:
|
44
|
+
- - ">="
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: 0.12.10
|
47
|
+
type: :runtime
|
48
|
+
version_requirements: *id003
|
49
|
+
- !ruby/object:Gem::Dependency
|
50
|
+
name: em-http-request
|
51
|
+
prerelease: false
|
52
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
53
|
+
none: false
|
54
|
+
requirements:
|
55
|
+
- - ">="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
version: 0.2.6
|
58
|
+
type: :runtime
|
59
|
+
version_requirements: *id004
|
60
|
+
description: Provides asynchronous connections to Solr
|
61
|
+
email: goodieboy@gmail.com
|
62
|
+
executables: []
|
63
|
+
|
64
|
+
extensions: []
|
65
|
+
|
66
|
+
extra_rdoc_files:
|
67
|
+
- LICENSE
|
68
|
+
- README.rdoc
|
69
|
+
files:
|
70
|
+
- LICENSE
|
71
|
+
- README.rdoc
|
72
|
+
- VERSION
|
73
|
+
- examples/bulk_indexer.rb
|
74
|
+
- lib/rsolr-async.rb
|
75
|
+
- spec/rsolr-async_spec.rb
|
76
|
+
- spec/spec.opts
|
77
|
+
- spec/spec_helper.rb
|
78
|
+
- Rakefile
|
79
|
+
homepage: http://github.com/mwmitchell/rsolr-async
|
80
|
+
licenses: []
|
81
|
+
|
82
|
+
post_install_message:
|
83
|
+
rdoc_options:
|
84
|
+
- --charset=UTF-8
|
85
|
+
require_paths:
|
86
|
+
- lib
|
87
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
88
|
+
none: false
|
89
|
+
requirements:
|
90
|
+
- - ">="
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
version: "0"
|
93
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
94
|
+
none: false
|
95
|
+
requirements:
|
96
|
+
- - ">="
|
97
|
+
- !ruby/object:Gem::Version
|
98
|
+
version: "0"
|
99
|
+
requirements: []
|
100
|
+
|
101
|
+
rubyforge_project:
|
102
|
+
rubygems_version: 1.8.6.1
|
103
|
+
signing_key:
|
104
|
+
specification_version: 3
|
105
|
+
summary: An EventMachine based connection adapter for RSolr
|
106
|
+
test_files:
|
107
|
+
- spec/rsolr-async_spec.rb
|
108
|
+
- spec/spec.opts
|
109
|
+
- spec/spec_helper.rb
|
110
|
+
- Rakefile
|