elasticsearch-transport 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. data/.gitignore +17 -0
  2. data/Gemfile +4 -0
  3. data/LICENSE.txt +13 -0
  4. data/README.md +276 -0
  5. data/Rakefile +67 -0
  6. data/elasticsearch-transport.gemspec +52 -0
  7. data/lib/elasticsearch-transport.rb +1 -0
  8. data/lib/elasticsearch/transport.rb +29 -0
  9. data/lib/elasticsearch/transport/client.rb +123 -0
  10. data/lib/elasticsearch/transport/extensions/test_cluster.rb +163 -0
  11. data/lib/elasticsearch/transport/transport/base.rb +236 -0
  12. data/lib/elasticsearch/transport/transport/connections/collection.rb +93 -0
  13. data/lib/elasticsearch/transport/transport/connections/connection.rb +117 -0
  14. data/lib/elasticsearch/transport/transport/connections/selector.rb +63 -0
  15. data/lib/elasticsearch/transport/transport/errors.rb +73 -0
  16. data/lib/elasticsearch/transport/transport/http/curb.rb +70 -0
  17. data/lib/elasticsearch/transport/transport/http/faraday.rb +59 -0
  18. data/lib/elasticsearch/transport/transport/response.rb +20 -0
  19. data/lib/elasticsearch/transport/transport/serializer/multi_json.rb +36 -0
  20. data/lib/elasticsearch/transport/transport/sniffer.rb +46 -0
  21. data/lib/elasticsearch/transport/version.rb +5 -0
  22. data/test/integration/client_test.rb +117 -0
  23. data/test/integration/transport_test.rb +37 -0
  24. data/test/profile/client_benchmark_test.rb +107 -0
  25. data/test/test_extensions.rb +139 -0
  26. data/test/test_helper.rb +58 -0
  27. data/test/unit/client_test.rb +109 -0
  28. data/test/unit/connection_collection_test.rb +83 -0
  29. data/test/unit/connection_selector_test.rb +64 -0
  30. data/test/unit/connection_test.rb +90 -0
  31. data/test/unit/serializer_test.rb +16 -0
  32. data/test/unit/sniffer_test.rb +146 -0
  33. data/test/unit/transport_base_test.rb +402 -0
  34. data/test/unit/transport_curb_test.rb +59 -0
  35. data/test/unit/transport_faraday_test.rb +73 -0
  36. metadata +342 -0
@@ -0,0 +1,123 @@
1
+ module Elasticsearch
2
+ module Transport
3
+
4
+ # Handles communication with an Elasticsearch cluster.
5
+ #
6
+ # See {file:README.md README} for usage and code examples.
7
+ #
8
+ class Client
9
+ DEFAULT_TRANSPORT_CLASS = Transport::HTTP::Faraday
10
+
11
+ DEFAULT_LOGGER = lambda do
12
+ require 'logger'
13
+ logger = Logger.new(STDERR)
14
+ logger.progname = 'elasticsearch'
15
+ logger.formatter = proc { |severity, datetime, progname, msg| "#{datetime}: #{msg}\n" }
16
+ logger
17
+ end
18
+
19
+ DEFAULT_TRACER = lambda do
20
+ require 'logger'
21
+ logger = Logger.new(STDERR)
22
+ logger.progname = 'elasticsearch.tracer'
23
+ logger.formatter = proc { |severity, datetime, progname, msg| "#{msg}\n" }
24
+ logger
25
+ end
26
+
27
+ # Returns the transport object.
28
+ #
29
+ # @see Elasticsearch::Transport::Transport::Base
30
+ # @see Elasticsearch::Transport::Transport::HTTP::Faraday
31
+ #
32
+ attr_accessor :transport
33
+
34
+ # Create a client connected to an Elasticsearch cluster.
35
+ #
36
+ # @option arguments [String,Array] :hosts Single host passed as a String or Hash, or multiple hosts
37
+ # passed as an Array; `host` or `url` keys are also valid
38
+ #
39
+ # @option arguments [Boolean] :log Use the default logger (disabled by default)
40
+ #
41
+ # @option arguments [Boolean] :trace Use the default tracer (disabled by default)
42
+ #
43
+ # @option arguments [Object] :logger An instance of a Logger-compatible object
44
+ #
45
+ # @option arguments [Object] :tracer An instance of a Logger-compatible object
46
+ #
47
+ # @option arguments [Number] :resurrect_after After how many seconds a dead connection should be tried again
48
+ #
49
+ # @option arguments [Boolean,Number] :reload_connections Reload connections after X requests (false by default)
50
+ #
51
+ # @option arguments [Boolean] :randomize_hosts Shuffle connections on initialization and reload (false by default)
52
+ #
53
+ # @option arguments [Integer] :sniffer_timeout Timeout for reloading connections in seconds (1 by default)
54
+ #
55
+ # @option arguments [Boolean,Number] :retry_on_failure Retry X times when request fails before raising and
56
+ # exception (false by default)
57
+ #
58
+ # @option arguments [Boolean] :reload_on_failure Reload connections after failure (false by default)
59
+ #
60
+ # @option arguments [Constant] :transport_class A specific transport class to use, will be initialized by
61
+ # the client and passed hosts and all arguments
62
+ #
63
+ # @option arguments [Object] :transport A specific transport instance
64
+ #
65
+ # @option arguments [Constant] :serializer_class A specific serializer class to use, will be initialized by
66
+ # the transport and passed the transport instance
67
+ #
68
+ # @option arguments [Constant] :selector An instance of selector strategy implemented with
69
+ # {Elasticsearch::Transport::Transport::Connections::Selector::Base}.
70
+ #
71
+ def initialize(arguments={})
72
+ transport_class = arguments[:transport_class] || DEFAULT_TRANSPORT_CLASS
73
+ hosts = arguments[:hosts] || arguments[:host] || arguments[:url]
74
+
75
+ arguments[:logger] ||= arguments[:log] ? DEFAULT_LOGGER.call() : nil
76
+ arguments[:tracer] ||= arguments[:trace] ? DEFAULT_TRACER.call() : nil
77
+ arguments[:reload_connections] ||= false
78
+ arguments[:retry_on_failure] ||= false
79
+ arguments[:reload_on_failure] ||= false
80
+ arguments[:randomize_hosts] ||= false
81
+
82
+ @transport = arguments[:transport] || \
83
+ transport_class.new(:hosts => __extract_hosts(hosts, arguments), :options => arguments)
84
+ end
85
+
86
+ # Performs a request through delegation to {#transport}.
87
+ #
88
+ def perform_request(method, path, params={}, body=nil)
89
+ transport.perform_request method, path, params, body
90
+ end
91
+
92
+ # Normalizes and returns hosts configuration.
93
+ #
94
+ # Arrayifies the `hosts_config` argument and extracts `host` and `port` info from strings.
95
+ # Performs shuffling when the `randomize_hosts` option is set.
96
+ #
97
+ # @return [Array<Hash>]
98
+ # @raise [ArgumentError]
99
+ #
100
+ # @api private
101
+ #
102
+ def __extract_hosts(hosts_config=nil, options={})
103
+ hosts_config = hosts_config.nil? ? ['localhost'] : Array(hosts_config)
104
+
105
+ hosts = hosts_config.map do |host|
106
+ case host
107
+ when String
108
+ # TODO: Handle protocol?
109
+ host, port = host.split(':')
110
+ { :host => host, :port => port }
111
+ when Hash
112
+ host
113
+ else
114
+ raise ArgumentError, "Please pass host as a String or Hash, #{host.class} given."
115
+ end
116
+ end
117
+
118
+ hosts.shuffle! if options[:randomize_hosts]
119
+ hosts
120
+ end
121
+ end
122
+ end
123
+ end
@@ -0,0 +1,163 @@
1
+ require 'ansi/code'
2
+
3
+ module Elasticsearch
4
+
5
+ # A convenience Ruby class for starting and stopping a separate testing cluster,
6
+ # to not depend on -- and not mess up -- <localhost:9200>.
7
+ #
8
+ module TestCluster
9
+ require 'timeout'
10
+ require 'net/http'
11
+ require 'uri'
12
+
13
+ @@number_of_nodes = 2
14
+ @@pids = []
15
+
16
+ # Start a cluster
17
+ #
18
+ # Starts the desired number of nodes in test-suitable configuration (memory store, no persistence, etc).
19
+ #
20
+ # @option arguments [String] :command Elasticsearch command (default: `elasticsearch`).
21
+ # @option arguments [Integer] :count Number of desired nodes (default: 2).
22
+ # @option arguments [String] :cluster_name Cluster name (default: `elasticsearch-ruby-test`).
23
+ # @option arguments [String] :port Starting port number; will be auto-incremented (default: 9250).
24
+ #
25
+ # You can also use environment variables to set these options.
26
+ #
27
+ def start(arguments={})
28
+ arguments[:command] = ENV['TEST_CLUSTER_COMMAND'] || 'elasticsearch'
29
+
30
+ unless system "which #{arguments[:command]} > /dev/null 2>&1"
31
+ STDERR.puts ANSI.red("[ERROR] Elasticsearch can't be started, is it installed? Run: $ which elasticsearch"), ''
32
+ abort
33
+ end
34
+
35
+ @@number_of_nodes = arguments[:count] if arguments[:count]
36
+
37
+ arguments[:port] = (ENV['TEST_CLUSTER_PORT'] || 9250).to_i
38
+ arguments[:cluster_name] = ENV['TEST_CLUSTER_NAME'] || 'elasticsearch-ruby-test'
39
+ arguments[:node_name] = 'node'
40
+
41
+ if running? :on => arguments[:port], :as => arguments[:cluster_name]
42
+ print ANSI.red("Elasticsearch cluster already running")
43
+ __wait_for_green(arguments[:port])
44
+ exit(0)
45
+ end
46
+
47
+ print ANSI.faint("Starting ") + ANSI.ansi(@@number_of_nodes.to_s, :bold, :faint) + ANSI.faint(" Elasticsearch nodes")
48
+
49
+ @@number_of_nodes.times do |n|
50
+ n += 1
51
+ pidfile = File.expand_path("tmp/elasticsearch-#{n}.pid", Dir.pwd)
52
+ pid = Process.spawn <<-COMMAND
53
+ #{arguments[:command]} \
54
+ -D es.foreground=yes \
55
+ -D es.cluster.name=#{arguments[:cluster_name]} \
56
+ -D es.node.name=#{arguments[:node_name]}-#{n} \
57
+ -D es.http.port=#{arguments[:port].to_i + (n-1)} \
58
+ -D es.gateway.type=none \
59
+ -D es.index.store.type=memory \
60
+ -D es.network.host=0.0.0.0 \
61
+ -D es.discovery.zen.ping.multicast.enabled=true \
62
+ -D es.pidfile=#{pidfile} \
63
+ > /dev/null 2>&1
64
+ COMMAND
65
+ Process.detach pid
66
+ end
67
+
68
+ __wait_for_green(arguments[:port])
69
+ end
70
+
71
+ # Stop the cluster.
72
+ #
73
+ # Gets the PID numbers from pidfiles in `$CWD/tmp` and stops any matching nodes.
74
+ #
75
+ def stop
76
+ pids = __get_pids
77
+ pidfiles = __get_pidfiles
78
+
79
+ unless pids.empty?
80
+ print "Stopping Elasticsearch nodes... "
81
+ pids.each_with_index do |pid, i|
82
+ begin
83
+ print ANSI.green("stopped PID #{pid}. ") if Process.kill 'KILL', pid
84
+ rescue Exception => e
85
+ print ANSI.red("[#{e.class}] PID #{pid} not found. ")
86
+ end
87
+ File.delete pidfiles[i] if pidfiles[i] && File.exists?(pidfiles[i])
88
+ end
89
+ puts
90
+ end
91
+ end
92
+
93
+ # Returns true when a specific test node is running.
94
+ #
95
+ # @option arguments [Integer] :on The port on which the node is running.
96
+ # @option arguments [String] :as The cluster name.
97
+ #
98
+ def running?(arguments={})
99
+ port = arguments[:on] || 9250
100
+ cluster_name = arguments[:as] || 'elasticsearch-ruby-test'
101
+
102
+ if cluster_health = Timeout::timeout(0.25) { __get_cluster_health(port) } rescue nil
103
+ return cluster_health['cluster_name'] == cluster_name && \
104
+ cluster_health['number_of_nodes'] == @@number_of_nodes
105
+ end
106
+ return false
107
+ end
108
+
109
+ # Blocks the process and waits for the cluster to be in a "green" state.
110
+ # Prints information about the cluster on STDOUT.
111
+ #
112
+ def __wait_for_green(port=9250)
113
+ uri = URI("http://localhost:#{port}/_cluster/health")
114
+
115
+ Timeout::timeout(30) do
116
+ loop do
117
+ response = Net::HTTP.get(uri) rescue nil
118
+ if response
119
+ pids = __get_pids
120
+
121
+ json = MultiJson.load(response)
122
+ if json['status'] == 'green' && json['number_of_nodes'].to_i == @@number_of_nodes
123
+ puts '',
124
+ ANSI.faint('-'*80),
125
+ ANSI.faint(
126
+ 'Cluster: '.ljust(20) + json['cluster_name'].to_s + "\n" +
127
+ 'Status: '.ljust(20) + json['status'].to_s + "\n" +
128
+ 'Number of nodes: '.ljust(20) + json['number_of_nodes'].to_s + "\n" +
129
+ 'PIDs'.ljust(20) + pids.inspect
130
+ ),
131
+ ANSI.faint('-'*80)
132
+ break
133
+ end
134
+ end
135
+ print ANSI.faint('.')
136
+ sleep 1
137
+ end
138
+ end
139
+ end
140
+
141
+ # Tries to load cluster health information
142
+ #
143
+ def __get_cluster_health(port=9250)
144
+ uri = URI("http://localhost:#{port}/_cluster/health")
145
+ if response = Net::HTTP.get(uri) rescue nil
146
+ return MultiJson.load(response)
147
+ end
148
+ end
149
+
150
+ # Returns a collection of PID numbers from pidfiles.
151
+ def __get_pids
152
+ __get_pidfiles.map { |pidfile| File.read(pidfile).to_i }.uniq
153
+ end
154
+
155
+ # Returns a collection of files with PID information.
156
+ #
157
+ def __get_pidfiles
158
+ Dir[File.expand_path('tmp/elasticsearch-*.pid', Dir.pwd)]
159
+ end
160
+
161
+ extend self
162
+ end
163
+ end
@@ -0,0 +1,236 @@
1
+ module Elasticsearch
2
+ module Transport
3
+ module Transport
4
+
5
+ # @abstract Module with common functionality for transport implementations.
6
+ #
7
+ module Base
8
+ DEFAULT_PORT = 9200
9
+ DEFAULT_PROTOCOL = 'http'
10
+ DEFAULT_RELOAD_AFTER = 10_000 # Requests
11
+ DEFAULT_RESURRECT_AFTER = 60 # Seconds
12
+ DEFAULT_MAX_TRIES = 3 # Requests
13
+ DEFAULT_SERIALIZER_CLASS = Serializer::MultiJson
14
+
15
+ attr_reader :hosts, :options, :connections, :counter, :last_request_at, :protocol
16
+ attr_accessor :serializer, :sniffer, :logger, :tracer, :reload_after, :resurrect_after, :max_tries
17
+
18
+ # Creates a new transport object.
19
+ #
20
+ # @param arguments [Hash] Settings and options for the transport
21
+ # @param block [Proc] Lambda or Proc which can be evaluated in the context of the "session" object
22
+ #
23
+ # @option arguments [Array] :hosts An Array of normalized hosts information
24
+ # @option arguments [Array] :options A Hash with options (usually passed by {Client})
25
+ #
26
+ # @see Client#initialize
27
+ #
28
+ def initialize(arguments={}, &block)
29
+ @hosts = arguments[:hosts] || []
30
+ @options = arguments[:options] || {}
31
+ @block = block
32
+ @connections = __build_connections
33
+
34
+ @serializer = options[:serializer] || ( options[:serializer_class] ? options[:serializer_class].new(self) : DEFAULT_SERIALIZER_CLASS.new(self) )
35
+ @protocol = options[:protocol] || DEFAULT_PROTOCOL
36
+
37
+ @logger = options[:logger]
38
+ @tracer = options[:tracer]
39
+
40
+ @sniffer = options[:sniffer_class] ? options[:sniffer_class].new(self) : Sniffer.new(self)
41
+ @counter = 0
42
+ @last_request_at = Time.now
43
+ @reload_after = options[:reload_connections].is_a?(Fixnum) ? options[:reload_connections] : DEFAULT_RELOAD_AFTER
44
+ @resurrect_after = options[:resurrect_after] || DEFAULT_RESURRECT_AFTER
45
+ @max_tries = options[:retry_on_failure].is_a?(Fixnum) ? options[:retry_on_failure] : DEFAULT_MAX_TRIES
46
+ end
47
+
48
+ # Returns a connection from the connection pool by delegating to {Connections::Collection#get_connection}.
49
+ #
50
+ # Resurrects dead connection if the `resurrect_after` timeout has passed.
51
+ # Increments the counter and performs connection reloading if the `reload_connections` option is set.
52
+ #
53
+ # @return [Connections::Connection]
54
+ # @see Connections::Collection#get_connection
55
+ #
56
+ def get_connection(options={})
57
+ resurrect_dead_connections! if Time.now > @last_request_at + @resurrect_after
58
+
59
+ connection = connections.get_connection(options)
60
+ @counter += 1
61
+
62
+ reload_connections! if @options[:reload_connections] && counter % reload_after == 0
63
+ connection
64
+ end
65
+
66
+ # Reloads and replaces the connection collection based on cluster information.
67
+ #
68
+ # @see Sniffer#hosts
69
+ #
70
+ def reload_connections!
71
+ hosts = sniffer.hosts
72
+ __rebuild_connections :hosts => hosts, :options => options
73
+ self
74
+ rescue SnifferTimeoutError
75
+ logger.error "[SnifferTimeoutError] Timeout when reloading connections." if logger
76
+ self
77
+ end
78
+
79
+ # Tries to "resurrect" all eligible dead connections.
80
+ #
81
+ # @see Connections::Connection#resurrect!
82
+ #
83
+ def resurrect_dead_connections!
84
+ connections.dead.each { |c| c.resurrect! }
85
+ end
86
+
87
+ # Replaces the connections collection.
88
+ #
89
+ # @api private
90
+ #
91
+ def __rebuild_connections(arguments={})
92
+ @hosts = arguments[:hosts] || []
93
+ @options = arguments[:options] || {}
94
+ @connections = __build_connections
95
+ end
96
+
97
+ # Log request and response information.
98
+ #
99
+ # @api private
100
+ #
101
+ def __log(method, path, params, body, url, response, json, took, duration)
102
+ logger.info "#{method.to_s.upcase} #{url} " +
103
+ "[status:#{response.status}, request:#{sprintf('%.3fs', duration)}, query:#{took}]"
104
+ logger.debug "> #{__convert_to_json(body)}" if body
105
+ logger.debug "< #{response.body}"
106
+ end
107
+
108
+ # Log failed request.
109
+ #
110
+ # @api private
111
+ def __log_failed(response)
112
+ logger.fatal "[#{response.status}] #{response.body}"
113
+ end
114
+
115
+ # Trace the request in the `curl` format.
116
+ #
117
+ # @api private
118
+ def __trace(method, path, params, body, url, response, json, took, duration)
119
+ trace_url = "http://localhost:9200/#{path}?pretty" +
120
+ ( params.empty? ? '' : "&#{::Faraday::Utils::ParamsHash[params].to_query}" )
121
+ trace_body = body ? " -d '#{__convert_to_json(body, :pretty => true)}'" : ''
122
+ tracer.info "curl -X #{method.to_s.upcase} '#{trace_url}'#{trace_body}\n"
123
+ tracer.debug "# #{Time.now.iso8601} [#{response.status}] (#{format('%.3f', duration)}s)\n#"
124
+ tracer.debug json ? serializer.dump(json, :pretty => true).gsub(/^/, '# ').sub(/\}$/, "\n# }")+"\n" : "# #{response.body}\n"
125
+ end
126
+
127
+ # Raise error specific for the HTTP response status or a generic server error
128
+ #
129
+ # @api private
130
+ def __raise_transport_error(response)
131
+ error = ERRORS[response.status] || ServerError
132
+ raise error.new "[#{response.status}] #{response.body}"
133
+ end
134
+
135
+ # Converts any non-String object to JSON
136
+ #
137
+ # @api private
138
+ def __convert_to_json(o=nil, options={})
139
+ o = o.is_a?(String) ? o : serializer.dump(o, options)
140
+ end
141
+
142
+ # Performs a request to Elasticsearch, while handling logging, tracing, marking dead connections,
143
+ # retrying the request and reloading the connections.
144
+ #
145
+ # @abstract The transport implementation has to implement this method either in full,
146
+ # or by invoking this method with a block. See {HTTP::Faraday#perform_request} for an example.
147
+ #
148
+ # @param method [String] Request method
149
+ # @param path [String] The API endpoint
150
+ # @param params [Hash] Request parameters (will be serialized by {Connections::Connection#full_url})
151
+ # @param body [Hash] Request body (will be serialized by the {#serializer})
152
+ # @param block [Proc] Code block to evaluate, passed from the implementation
153
+ #
154
+ # @return [Response]
155
+ # @raise [NoMethodError] If no block is passed
156
+ # @raise [ServerError] If request failed on server
157
+ # @raise [Error] If no connection is available
158
+ #
159
+ def perform_request(method, path, params={}, body=nil, &block)
160
+ raise NoMethodError, "Implement this method in your transport class" unless block_given?
161
+ start = Time.now if logger || tracer
162
+ tries = 0
163
+
164
+ begin
165
+ tries += 1
166
+ connection = get_connection or raise Error.new("Cannot get new connection from pool.")
167
+ url = connection.full_url(path, params)
168
+ response = block.call(connection, url)
169
+
170
+ connection.healthy! if connection.failures > 0
171
+
172
+ rescue *host_unreachable_exceptions => e
173
+ logger.error "[#{e.class}] #{e.message} #{connection.host.inspect}" if logger
174
+
175
+ connection.dead!
176
+
177
+ if @options[:reload_on_failure] and tries < connections.all.size
178
+ logger.warn "[#{e.class}] Reloading connections (attempt #{tries} of #{connections.size})" if logger
179
+ reload_connections! and retry
180
+ end
181
+
182
+ if @options[:retry_on_failure]
183
+ logger.warn "[#{e.class}] Attempt #{tries} connecting to #{connection.host.inspect}" if logger
184
+ if tries < max_tries
185
+ retry
186
+ else
187
+ logger.fatal "[#{e.class}] Cannot connect to #{connection.host.inspect} after #{tries} tries" if logger
188
+ raise e
189
+ end
190
+ else
191
+ raise e
192
+ end
193
+
194
+ rescue Exception => e
195
+ logger.fatal "[#{e.class}] #{e.message} (#{connection.host.inspect})" if logger
196
+ raise e
197
+ end
198
+
199
+ json = serializer.load(response.body) if response.body.to_s =~ /^\{/
200
+ took = (json['took'] ? sprintf('%.3fs', json['took']/1000.0) : 'n/a') rescue 'n/a' if logger || tracer
201
+ duration = Time.now-start if logger || tracer
202
+
203
+ __log method, path, params, body, url, response, json, took, duration if logger
204
+ __trace method, path, params, body, url, response, json, took, duration if tracer
205
+
206
+ if response.status.to_i >= 300
207
+ __log_failed response if logger
208
+ __raise_transport_error response
209
+ else
210
+ Response.new response.status, json || response.body, response.headers
211
+ end
212
+ ensure
213
+ @last_request_at = Time.now
214
+ end
215
+
216
+ # @abstract Returns an Array of connection errors specific to the transport implementation.
217
+ # See {HTTP::Faraday#host_unreachable_exceptions} for an example.
218
+ #
219
+ # @return [Array]
220
+ #
221
+ def host_unreachable_exceptions
222
+ [Errno::ECONNREFUSED]
223
+ end
224
+
225
+ # @abstract A transport implementation must implement this method.
226
+ # See {HTTP::Faraday#__build_connections} for an example.
227
+ #
228
+ # @return [Connections::Collection]
229
+ # @api private
230
+ def __build_connections
231
+ raise NoMethodError, "Implement this method in your class"
232
+ end
233
+ end
234
+ end
235
+ end
236
+ end