jruby-elasticsearch 0.0.11 → 0.0.12

Sign up to get free protection for your applications and to get access to all the features.
@@ -3,11 +3,12 @@ require "jruby-elasticsearch/request"
3
3
 
4
4
  class ElasticSearch::BulkRequest < ElasticSearch::Request
5
5
  # Create a new index request.
6
+ public
6
7
  def initialize(client)
7
8
  @client = client
8
9
  @prep = @client.prepareBulk()
9
10
  super()
10
- end
11
+ end # def initialize
11
12
 
12
13
  # Execute this index request.
13
14
  # This call is asynchronous.
@@ -17,13 +18,16 @@ class ElasticSearch::BulkRequest < ElasticSearch::Request
17
18
  use_callback(&block) if block_given?
18
19
  action = @prep.execute(@handler)
19
20
  return action
20
- end
21
+ end # def execute
21
22
 
22
23
  # Execute this index request synchronously
24
+ public
23
25
  def execute!
24
26
  return @prep.execute.actionGet()
25
- end
27
+ end # def execute!
26
28
 
29
+ # Index a document.
30
+ public
27
31
  def index(index, type, id=nil, data={})
28
32
  req = org.elasticsearch.action.index.IndexRequest.new(index)
29
33
  req.type(type) if type
@@ -32,6 +36,7 @@ class ElasticSearch::BulkRequest < ElasticSearch::Request
32
36
  @prep.add(req)
33
37
  end
34
38
 
39
+ public
35
40
  def <<(request)
36
41
  @prep.add(request)
37
42
  end # def <<
@@ -1,7 +1,80 @@
1
1
  require "jruby-elasticsearch/namespace"
2
+ require "thread"
2
3
 
3
4
  class ElasticSearch::BulkStream
4
- def initialize
5
- @bulkthread = Thread.new
6
- end
7
- end
5
+ # Create a new bulk stream. This allows you to send
6
+ # index and other bulk events asynchronously and use
7
+ # the bulk api in ElasticSearch in a streaming way.
8
+ #
9
+ # The 'queue_size' is the maximum size of unflushed
10
+ # requests. If the queue reaches this size, new requests
11
+ # will block until there is room to move.
12
+ def initialize(client, queue_size=10, flush_interval=1)
13
+ @bulkthread = Thread.new { run }
14
+ @client = client
15
+ @queue_size = queue_size
16
+ @queue = SizedQueue.new(@queue_size)
17
+ @flush_interval = flush_interval
18
+ end # def initialize
19
+
20
+ # See ElasticSearch::BulkRequest#index for arguments.
21
+ public
22
+ def index(*args)
23
+ # TODO(sissel): It's not clear I need to queue this up, I could just
24
+ # call BulkRequest#index() and when we have 10 or whatnot, flush, but
25
+ # Queue gives us a nice blocking mechanism anyway.
26
+ @queue << [:index, *args]
27
+ end # def index
28
+
29
+ # The stream runner.
30
+ private
31
+ def run
32
+ # TODO(sissel): Make a way to shutdown this thread.
33
+ while true
34
+ requests = []
35
+ if @queue.size == @queue_size
36
+ # queue full, flush now.
37
+ flush
38
+ else
39
+ # Not full, so sleep and flush anyway.
40
+ sleep(@flush_interval)
41
+ flush
42
+ end
43
+
44
+ if @stop and @queue.size == 0
45
+ # Queue empty and it's time to stop.
46
+ break
47
+ end
48
+ end # while true
49
+ end # def run
50
+
51
+ # Stop the stream
52
+ public
53
+ def stop
54
+ @queue << nil
55
+ @stop = true
56
+ end # def stop
57
+
58
+ # Flush the queue right now. This will block until the
59
+ # bulk request has completed.
60
+ public
61
+ def flush
62
+ bulk = @client.bulk
63
+
64
+ flush_one = proc do
65
+ # block if no data.
66
+ method, *args = @queue.pop
67
+ return if args.nil? # probably we are now stopping.
68
+ bulk.send(method, *args)
69
+ end
70
+
71
+ flush_one.call
72
+
73
+ 1.upto([@queue.size, @queue_size - 1].min) do
74
+ flush_one.call
75
+ end
76
+
77
+ # Block until this finishes
78
+ bulk.execute!
79
+ end # def flush
80
+ end # class ElasticSearch::BulkStream
@@ -53,7 +53,12 @@ class ElasticSearch::Client
53
53
  # request.
54
54
  public
55
55
  def bulk
56
- ElasticSearch::BulkRequest.new(@client)
56
+ return ElasticSearch::BulkRequest.new(@client)
57
+ end # def bulk
58
+
59
+ public
60
+ def bulkstream(queue_size=10, flush_interval=1)
61
+ return ElasticSearch::BulkStream.new(self, queue_size, flush_interval)
57
62
  end # def bulk
58
63
 
59
64
  # Index a new document
@@ -1,9 +1,5 @@
1
1
  require "jruby-elasticsearch/client"
2
2
  require "jruby-elasticsearch/bulkrequest"
3
-
4
- #class Proc
5
- #include java.lang.Runnable
6
- #alias_method :run, :call
7
- #end
3
+ require "jruby-elasticsearch/bulkstream"
8
4
 
9
5
  module ElasticSearch; end;
@@ -4,11 +4,17 @@ class TestElasticSearch < Test::Unit::TestCase
4
4
  def setup
5
5
  # Require all the elasticsearch libs
6
6
  raise "Please set ELASTICSEARCH_HOME" if ENV['ELASTICSEARCH_HOME'].nil?
7
- Dir[File.join(ENV['ELASTICSEARCH_HOME'],"lib/*.jar")].each do |jar|
7
+
8
+ dir = File.join(ENV["ELASTICSEARCH_HOME"], "lib")
9
+ if !File.directory?(dir)
10
+ raise "ELASTICSEARCH_HOME set, but #{dir} doesn't exist"
11
+ end
12
+
13
+ Dir.glob(File.join(dir, "*.jar")).each do |jar|
8
14
  require jar
9
15
  end
10
16
 
11
- $:.unshift("lib")
17
+ $:.unshift(File.join(File.dirname(__FILE__), "..", "lib"))
12
18
  require "jruby-elasticsearch"
13
19
 
14
20
  # Start a local elasticsearch node
@@ -69,4 +75,34 @@ class TestElasticSearch < Test::Unit::TestCase
69
75
  sleep 1
70
76
  end
71
77
  end
72
- end
78
+
79
+ def test_bulk_stream_synchronous
80
+ stream = @client.bulkstream(10)
81
+ tries = 10
82
+ entries = 30
83
+ 1.upto(entries) do |i|
84
+ stream.index("hello", "world", { "foo" => "bar", "i" => i })
85
+ end
86
+ stream.stop
87
+
88
+ found = false
89
+ 1.upto(tries) do
90
+ search = @client.search do
91
+ index "hello"
92
+ query "*"
93
+ end
94
+ # Results is an org.elasticsearch.action.search.SearchResponse
95
+ results = search.execute!
96
+ count = results.hits.totalHits
97
+
98
+ if count == entries
99
+ # assert for good measure
100
+ found = true
101
+ break
102
+ end
103
+ sleep 0.2
104
+ end # try a bunch to find our results
105
+
106
+ assert(found, "Search results were not found.")
107
+ end # def test_bulk_stream_synchronous
108
+ end # class TestElasticSearch
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: jruby-elasticsearch
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.0.11
5
+ version: 0.0.12
6
6
  platform: ruby
7
7
  authors:
8
8
  - Jordan Sissel
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-10-11 00:00:00 -07:00
13
+ date: 2011-10-14 00:00:00 -07:00
14
14
  default_executable:
15
15
  dependencies: []
16
16