lusis-jruby-elasticsearch 0.0.13

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,5 @@
1
+ require "jruby-elasticsearch/client"
2
+ require "jruby-elasticsearch/bulkrequest"
3
+ require "jruby-elasticsearch/bulkstream"
4
+
5
+ module ElasticSearch; end;
@@ -0,0 +1,48 @@
1
+ require "java"
2
+ require "jruby-elasticsearch/namespace"
3
+
4
+ class ElasticSearch::ActionListener
5
+ include org.elasticsearch.action.ActionListener
6
+
7
+ def initialize
8
+ @failure_callbacks = []
9
+ @success_callbacks = []
10
+ end # def initialize
11
+
12
+ # Helper for registering callbacks.
13
+ # 'what' should be either :failure or :success
14
+ #
15
+ # You can register multiple callbacks if you wish.
16
+ # Callbacks are invoked in order of addition.
17
+ def on(what, &block)
18
+ case what
19
+ when :failure
20
+ @failure_callbacks << block
21
+ when :success
22
+ @success_callbacks << block
23
+ else
24
+ raise "Unknown event '#{what}' for #{self.class.name}"
25
+ end
26
+ return self
27
+ end # def on
28
+
29
+ # Conforming to Interface org.elasticsearch.action.ActionListener
30
+ def onFailure(exception)
31
+ if !@failure_callbacks.empty?
32
+ @failure_callbacks.each { |c| c.call(exception) }
33
+ else
34
+ # Default is no failure callbacks
35
+ raise exception
36
+ end
37
+ end # def onFailure
38
+
39
+ # Conforming to Interface org.elasticsearch.action.ActionListener
40
+ def onResponse(response)
41
+ if !@success_callbacks.empty?
42
+ @success_callbacks.each { |c| c.call(response) }
43
+ else
44
+ # Default if no success callbacks
45
+ puts "#{self.class.name}#onResponse => #{response.inspect} (#{self})"
46
+ end
47
+ end # def onResponse
48
+ end # class ElasticSearch::ActionListener
@@ -0,0 +1,43 @@
1
+ require "jruby-elasticsearch/namespace"
2
+ require "jruby-elasticsearch/request"
3
+
4
+ class ElasticSearch::BulkRequest < ElasticSearch::Request
5
+ # Create a new index request.
6
+ public
7
+ def initialize(client)
8
+ @client = client
9
+ @prep = @client.prepareBulk()
10
+ super()
11
+ end # def initialize
12
+
13
+ # Execute this index request.
14
+ # This call is asynchronous.
15
+ #
16
+ # If a block is given, register it for both failure and success.
17
+ def execute(&block)
18
+ use_callback(&block) if block_given?
19
+ action = @prep.execute(@handler)
20
+ return action
21
+ end # def execute
22
+
23
+ # Execute this index request synchronously
24
+ public
25
+ def execute!
26
+ return @prep.execute.actionGet()
27
+ end # def execute!
28
+
29
+ # Index a document.
30
+ public
31
+ def index(index, type, id=nil, data={})
32
+ req = org.elasticsearch.action.index.IndexRequest.new(index)
33
+ req.type(type) if type
34
+ req.id(id.to_s) if id
35
+ req.source(data)
36
+ @prep.add(req)
37
+ end
38
+
39
+ public
40
+ def <<(request)
41
+ @prep.add(request)
42
+ end # def <<
43
+ end # def ElasticSearch::BulkRequest
@@ -0,0 +1,80 @@
1
+ require "jruby-elasticsearch/namespace"
2
+ require "thread"
3
+
4
+ class ElasticSearch::BulkStream
5
+ # Create a new bulk stream. This allows you to send
6
+ # index and other bulk events asynchronously and use
7
+ # the bulk api in ElasticSearch in a streaming way.
8
+ #
9
+ # The 'queue_size' is the maximum size of unflushed
10
+ # requests. If the queue reaches this size, new requests
11
+ # will block until there is room to move.
12
+ def initialize(client, queue_size=10, flush_interval=1)
13
+ @bulkthread = Thread.new { run }
14
+ @client = client
15
+ @queue_size = queue_size
16
+ @queue = SizedQueue.new(@queue_size)
17
+ @flush_interval = flush_interval
18
+ end # def initialize
19
+
20
+ # See ElasticSearch::BulkRequest#index for arguments.
21
+ public
22
+ def index(*args)
23
+ # TODO(sissel): It's not clear I need to queue this up, I could just
24
+ # call BulkRequest#index() and when we have 10 or whatnot, flush, but
25
+ # Queue gives us a nice blocking mechanism anyway.
26
+ @queue << [:index, *args]
27
+ end # def index
28
+
29
+ # The stream runner.
30
+ private
31
+ def run
32
+ # TODO(sissel): Make a way to shutdown this thread.
33
+ while true
34
+ requests = []
35
+ if @queue.size == @queue_size
36
+ # queue full, flush now.
37
+ flush
38
+ else
39
+ # Not full, so sleep and flush anyway.
40
+ sleep(@flush_interval)
41
+ flush
42
+ end
43
+
44
+ if @stop and @queue.size == 0
45
+ # Queue empty and it's time to stop.
46
+ break
47
+ end
48
+ end # while true
49
+ end # def run
50
+
51
+ # Stop the stream
52
+ public
53
+ def stop
54
+ @queue << nil
55
+ @stop = true
56
+ end # def stop
57
+
58
+ # Flush the queue right now. This will block until the
59
+ # bulk request has completed.
60
+ public
61
+ def flush
62
+ bulk = @client.bulk
63
+
64
+ flush_one = proc do
65
+ # block if no data.
66
+ method, *args = @queue.pop
67
+ return if args.nil? # probably we are now stopping.
68
+ bulk.send(method, *args)
69
+ end
70
+
71
+ flush_one.call
72
+
73
+ 1.upto([@queue.size, @queue_size - 1].min) do
74
+ flush_one.call
75
+ end
76
+
77
+ # Block until this finishes
78
+ bulk.execute!
79
+ end # def flush
80
+ end # class ElasticSearch::BulkStream
@@ -0,0 +1,126 @@
1
+ require "java"
2
+ require "jruby-elasticsearch/namespace"
3
+ require "jruby-elasticsearch/indexrequest"
4
+ require "jruby-elasticsearch/searchrequest"
5
+
6
+ class ElasticSearch::Client
7
+
8
+ # Creates a new ElasticSearch client.
9
+ #
10
+ # options:
11
+ # :type => [:local, :node] - :local will create a process-local
12
+ # elasticsearch instances
13
+ # :host => "hostname" - the hostname to connect to.
14
+ # :port => 9200 - the port to connect to
15
+ # :cluster => "clustername" - the cluster name to use
16
+ def initialize(options={})
17
+ builder = org.elasticsearch.node.NodeBuilder.nodeBuilder
18
+ builder.client(true)
19
+
20
+ # The client doesn't need to serve http
21
+ builder.settings.put("http.enabled", false)
22
+
23
+ case options[:type]
24
+ when :local
25
+ builder.local(true)
26
+ @node = builder.node
27
+ @client = @node.client
28
+ when :transport
29
+ # TODO(sissel): Support transport client
30
+ else
31
+ # Use unicast discovery a host is given
32
+ if !options[:host].nil?
33
+ port = (options[:port] or "9300")
34
+ builder.settings.put("discovery.zen.ping.multicast.enabled", false)
35
+ builder.settings.put("discovery.zen.ping.unicast.hosts", "#{options[:host]}:#{port}")
36
+ #builder.settings.put("es.transport.tcp.port", port)
37
+ end
38
+
39
+ if options[:bind_host]
40
+ builder.settings.put('network.host', options[:bind_host])
41
+ end
42
+
43
+ if !options[:cluster].nil?
44
+ builder.clusterName(options[:cluster])
45
+ end
46
+ @node = builder.node
47
+ @client = @node.client
48
+ end
49
+
50
+ end # def initialize
51
+
52
+ # Get a new BulkRequest for sending multiple updates to elasticsearch in one
53
+ # request.
54
+ public
55
+ def bulk
56
+ return ElasticSearch::BulkRequest.new(@client)
57
+ end # def bulk
58
+
59
+ public
60
+ def bulkstream(queue_size=10, flush_interval=1)
61
+ return ElasticSearch::BulkStream.new(self, queue_size, flush_interval)
62
+ end # def bulk
63
+
64
+ # Index a new document
65
+ #
66
+ # args:
67
+ # index: the index name
68
+ # type: the type name
69
+ # id: (optional) the id of the document
70
+ # data: (optional) the data for this document
71
+ # &block: (optional) optional block for using the DSL to add data
72
+ #
73
+ # Returns an ElasticSearch::IndexRequest instance.
74
+ #
75
+ # Example w/ DSL:
76
+ #
77
+ # request = client.index("foo", "logs") do
78
+ # filename "/var/log/message"
79
+ # mesage "hello world"
80
+ # timestamp 123456
81
+ # end
82
+ #
83
+ # request.execute!
84
+ def index(index, type, id=nil, data={}, &block)
85
+ # Permit 'id' being omitted entirely.
86
+ # Thus a call call: index("foo", "bar", somehash) is valid.
87
+ if id.is_a?(Hash)
88
+ data = id
89
+ id = nil
90
+ end
91
+
92
+ indexreq = ElasticSearch::IndexRequest.new(@client, index, type, id, data)
93
+ if block_given?
94
+ indexreq.instance_eval(&block)
95
+ end
96
+ return indexreq
97
+ end # def index
98
+
99
+ # Search for data.
100
+ # If a block is given, it is passed to SearchRequest#with so you can
101
+ # more easily configure the search, like so:
102
+ #
103
+ # search = client.search("foo") do
104
+ # query("*")
105
+ # histogram("field", 1000)
106
+ # end
107
+ #
108
+ # The context of the block is of the SearchRequest object.
109
+ public
110
+ def search(&block)
111
+ searchreq = ElasticSearch::SearchRequest.new(@client)
112
+ if block_given?
113
+ searchreq.with(&block)
114
+ end
115
+ return searchreq
116
+ end # def search
117
+
118
+ def cluster
119
+ return @client.admin.cluster
120
+ end
121
+
122
+ def node
123
+ return @client.admin.cluster
124
+ end
125
+ end # class ElasticSearch::Client
126
+
@@ -0,0 +1,47 @@
1
+ require "jruby-elasticsearch/namespace"
2
+ require "jruby-elasticsearch/request"
3
+
4
+ class ElasticSearch::IndexRequest < ElasticSearch::Request
5
+ # Create a new index request.
6
+ def initialize(client, index, type, id=nil, data={})
7
+ @client = client
8
+ @index = index
9
+ @type = type
10
+ @id = id
11
+ @data = data
12
+
13
+ # This should silence jruby warnings for 'multiple java methods for prepareIndex'
14
+ if id.nil?
15
+ @prep = @client.prepareIndex(index, type)
16
+ else
17
+ @prep = @client.prepareIndex(index, type, id)
18
+ end
19
+ super()
20
+ end
21
+
22
+ # Execute this index request.
23
+ # This call is asynchronous.
24
+ #
25
+ # If a block is given, register it for both failure and success.
26
+ def execute(&block)
27
+ @prep.setSource(@data)
28
+ use_callback(&block) if block_given?
29
+
30
+ action = @prep.execute(@handler)
31
+ return action
32
+ end
33
+
34
+ # Execute this index request synchronously
35
+ def execute!
36
+ @prep.setSource(@data)
37
+ return @prep.execute.actionGet()
38
+ end
39
+
40
+ # DSL helper.
41
+ # TODO(sissel): Move this away to a DSL module.
42
+ def method_missing(*args)
43
+ key, value = args
44
+ puts "Adding: #{key}: #{value.inspect}"
45
+ @data[key.to_s] = value
46
+ end
47
+ end
@@ -0,0 +1,4 @@
1
+
2
+ module ElasticSearch
3
+ # empty for now
4
+ end
@@ -0,0 +1,27 @@
1
+ require "jruby-elasticsearch/namespace"
2
+ require "jruby-elasticsearch/actionlistener"
3
+
4
+ class ElasticSearch::Request
5
+ # Create a new index request.
6
+ def initialize
7
+ @handler = ElasticSearch::ActionListener.new
8
+ end
9
+
10
+ # See ElasticSearch::ActionListener#on
11
+ def on(event, &block)
12
+ #puts "Event[#{event}] => #{block} (#{@handler})"
13
+ @handler.on(event, &block)
14
+ return self
15
+ end
16
+
17
+ # Execute this index request.
18
+ # This call is asynchronous.
19
+ #
20
+ # If a block is given, register it for both failure and success.
21
+ def use_callback(&block)
22
+ if block_given?
23
+ on(:failure, &block)
24
+ on(:success, &block)
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,138 @@
1
+ require "jruby-elasticsearch/namespace"
2
+ require "jruby-elasticsearch/request"
3
+
4
+ class ElasticSearch::SearchRequest < ElasticSearch::Request
5
+ begin
6
+ QueryStringQueryBuilder = org.elasticsearch.index.query.xcontent.QueryStringQueryBuilder
7
+ rescue NameError
8
+ # The 'xcontent' namespace was removed in elasticsearch 0.17.0
9
+ QueryStringQueryBuilder = org.elasticsearch.index.query.QueryStringQueryBuilder
10
+ end
11
+
12
+ # Create a new index request.
13
+ public
14
+ def initialize(client)
15
+ @client = client
16
+ # Try the 0.19 API
17
+ begin
18
+ @prep = org.elasticsearch.action.search.SearchRequestBuilder.new(@client)
19
+ rescue NameError
20
+ # Okay so maybe the pre-0.19 API works?
21
+ @prep = org.elasticsearch.client.action.search.SearchRequestBuilder.new(@client)
22
+ end
23
+ @indeces = []
24
+ super()
25
+ end # def initialize
26
+
27
+ public
28
+ def with(&block)
29
+ instance_eval(&block)
30
+ return self
31
+ end # def with
32
+
33
+ public
34
+ def index(index_name)
35
+ @indeces << index_name
36
+ end
37
+
38
+ # Execute this search request.
39
+ # This call is asynchronous.
40
+ #
41
+ # If a block is given, register it for both failure and success.
42
+ #
43
+ # On success, callback will receive a
44
+ # org.elasticsearch.action.search.SearchResponse
45
+ public
46
+ def execute(&block)
47
+ use_callback(&block) if block_given?
48
+ @prep.setIndices(@indeces.to_java(:String))
49
+ action = @prep.execute(@handler)
50
+ return action
51
+ end # def execute
52
+
53
+ # Execute this index request synchronously
54
+ # Returns an org.elasticsearch.action.search.SearchResponse
55
+ public
56
+ def execute!
57
+ @prep.setIndices(@indeces.to_java(:String))
58
+ return @prep.execute.actionGet()
59
+ end # def execute!
60
+
61
+ public
62
+ def sort(field, order)
63
+ case order
64
+ when :asc
65
+ order_val = org.elasticsearch.search.sort.SortOrder::ASC
66
+ when :desc
67
+ order_val = org.elasticsearch.search.sort.SortOrder::DESC
68
+ else
69
+ raise "Invalid sort order '#{order.inspect}'"
70
+ end
71
+ @prep.addSort(field, order_val)
72
+ return self
73
+ end # def sort
74
+
75
+ public
76
+ def query(query_string, default_operator=:and)
77
+ # TODO(sissel): allow doing other queries and such.
78
+ qbuilder = QueryStringQueryBuilder.new(query_string)
79
+
80
+ operator = QueryStringQueryBuilder::Operator
81
+ case default_operator
82
+ when :and
83
+ qbuilder.defaultOperator(operator::AND)
84
+ when :or
85
+ qbuilder.defaultOperator(operator::OR)
86
+ else
87
+ raise "Unknown default operator '#{default_operator.inspect}'"
88
+ end
89
+
90
+ @prep.setQuery(qbuilder)
91
+ return self
92
+ end # def query
93
+
94
+ # Add a histogram facet to this query. Can be invoked multiple times.
95
+ public
96
+ def histogram(field, interval, name=nil)
97
+ if name.nil?
98
+ # TODO(sissel): How do we expose the name of the histogram?
99
+ name = "#{field}_#{interval}"
100
+ end
101
+ # TODO(sissel): Support 'global' ?
102
+ builder = org.elasticsearch.search.facet.histogram.HistogramFacetBuilder.new(name)
103
+ builder.field(field)
104
+ builder.interval(interval)
105
+ @prep.addFacet(builder)
106
+ return self
107
+ end # def histogram
108
+
109
+ public
110
+ def terms(field, name=nil)
111
+ if name.nil?
112
+ # TODO(sissel): How do we expose the name of the histogram?
113
+ name = field
114
+ end
115
+ # TODO(sissel): Support 'global' ?
116
+ builder = org.elasticsearch.search.facet.terms.TermsFacetBuilder.new(name)
117
+ builder.field(field)
118
+ @prep.addFacet(builder)
119
+ return self
120
+ end # def terms
121
+
122
+ public
123
+ def size(s)
124
+ @prep.setSize(s)
125
+ return self
126
+ end
127
+ alias :count :size
128
+ alias :limit :size
129
+
130
+ public
131
+ def from(from)
132
+ @prep.setFrom(from)
133
+ return self
134
+ end
135
+ alias :offset :from
136
+ alias :offset :from
137
+
138
+ end # class ElasticSearch::SearchRequest
@@ -0,0 +1,108 @@
1
+ require "test/unit"
2
+
3
+ class TestElasticSearch < Test::Unit::TestCase
4
+ def setup
5
+ # Require all the elasticsearch libs
6
+ raise "Please set ELASTICSEARCH_HOME" if ENV['ELASTICSEARCH_HOME'].nil?
7
+
8
+ dir = File.join(ENV["ELASTICSEARCH_HOME"], "lib")
9
+ if !File.directory?(dir)
10
+ raise "ELASTICSEARCH_HOME set, but #{dir} doesn't exist"
11
+ end
12
+
13
+ Dir.glob(File.join(dir, "*.jar")).each do |jar|
14
+ require jar
15
+ end
16
+
17
+ $:.unshift(File.join(File.dirname(__FILE__), "..", "lib"))
18
+ require "jruby-elasticsearch"
19
+
20
+ # Start a local elasticsearch node
21
+ builder = org.elasticsearch.node.NodeBuilder.nodeBuilder
22
+ builder.local(true)
23
+ @elasticsearch = builder.node
24
+ @elasticsearch.start
25
+
26
+ # Create a client which will find a nearby elasticsearch cluster using
27
+ # the default discovery mechanism.
28
+ @client = ElasticSearch::Client.new({:type => :local})
29
+ end
30
+
31
+ def test_index_asynchronously
32
+ data = { "fizzle" => "dazzle", "pants" => "off" }
33
+ req = @client.index("twitter", "tweet", data)
34
+
35
+ # Set up async callbacks
36
+ done = false
37
+ req.on(:success) do |response|
38
+ assert_not_nil response
39
+ done = true
40
+ end.on(:failure) do |exception|
41
+ raise exception
42
+ done = true
43
+ end
44
+
45
+ # Execute it, but do it asynchronously.
46
+ req.execute
47
+
48
+ # Wait until we are done.
49
+ while !done
50
+ sleep 1
51
+ end
52
+ end
53
+
54
+ def test_bulk_index_asynchronously
55
+ data = { "fizzle" => "dazzle", "pants" => "off" }
56
+ bulk = @client.bulk
57
+ bulk.index("twitter", "tweet1", data)
58
+ bulk.index("twitter", "tweet2")
59
+
60
+ # Set up async callbacks
61
+ done = false
62
+ bulk.on(:success) do |response|
63
+ assert_not_nil response
64
+ done = true
65
+ end.on(:failure) do |exception|
66
+ raise exception
67
+ done = true
68
+ end
69
+
70
+ # Execute it, but do it asynchronously.
71
+ bulk.execute
72
+
73
+ # Wait until we are done.
74
+ while !done
75
+ sleep 1
76
+ end
77
+ end
78
+
79
+ def test_bulk_stream_synchronous
80
+ stream = @client.bulkstream(10)
81
+ tries = 10
82
+ entries = 30
83
+ 1.upto(entries) do |i|
84
+ stream.index("hello", "world", { "foo" => "bar", "i" => i })
85
+ end
86
+ stream.stop
87
+
88
+ found = false
89
+ 1.upto(tries) do
90
+ search = @client.search do
91
+ index "hello"
92
+ query "*"
93
+ end
94
+ # Results is an org.elasticsearch.action.search.SearchResponse
95
+ results = search.execute!
96
+ count = results.hits.totalHits
97
+
98
+ if count == entries
99
+ # assert for good measure
100
+ found = true
101
+ break
102
+ end
103
+ sleep 0.2
104
+ end # try a bunch to find our results
105
+
106
+ assert(found, "Search results were not found.")
107
+ end # def test_bulk_stream_synchronous
108
+ end # class TestElasticSearch
metadata ADDED
@@ -0,0 +1,65 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: lusis-jruby-elasticsearch
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 0.0.13
6
+ platform: ruby
7
+ authors:
8
+ - Jordan Sissel
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2012-05-01 00:00:00 Z
14
+ dependencies: []
15
+
16
+ description: ...
17
+ email:
18
+ - jls@semicomplete.com
19
+ executables: []
20
+
21
+ extensions: []
22
+
23
+ extra_rdoc_files: []
24
+
25
+ files:
26
+ - lib/jruby-elasticsearch.rb
27
+ - lib/jruby-elasticsearch/actionlistener.rb
28
+ - lib/jruby-elasticsearch/bulkrequest.rb
29
+ - lib/jruby-elasticsearch/bulkstream.rb
30
+ - lib/jruby-elasticsearch/client.rb
31
+ - lib/jruby-elasticsearch/indexrequest.rb
32
+ - lib/jruby-elasticsearch/namespace.rb
33
+ - lib/jruby-elasticsearch/request.rb
34
+ - lib/jruby-elasticsearch/searchrequest.rb
35
+ - test/test_integration.rb
36
+ homepage: https://github.com/jordansissel/jruby-elasticsearch
37
+ licenses:
38
+ - Apache License (2.0)
39
+ post_install_message:
40
+ rdoc_options: []
41
+
42
+ require_paths:
43
+ - lib
44
+ - lib
45
+ required_ruby_version: !ruby/object:Gem::Requirement
46
+ none: false
47
+ requirements:
48
+ - - ">="
49
+ - !ruby/object:Gem::Version
50
+ version: "0"
51
+ required_rubygems_version: !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: "0"
57
+ requirements: []
58
+
59
+ rubyforge_project:
60
+ rubygems_version: 1.8.15
61
+ signing_key:
62
+ specification_version: 3
63
+ summary: JRuby API for ElasticSearch using the native ES Java API
64
+ test_files: []
65
+