elasticsearch-transport 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (36) hide show
  1. data/.gitignore +17 -0
  2. data/Gemfile +4 -0
  3. data/LICENSE.txt +13 -0
  4. data/README.md +276 -0
  5. data/Rakefile +67 -0
  6. data/elasticsearch-transport.gemspec +52 -0
  7. data/lib/elasticsearch-transport.rb +1 -0
  8. data/lib/elasticsearch/transport.rb +29 -0
  9. data/lib/elasticsearch/transport/client.rb +123 -0
  10. data/lib/elasticsearch/transport/extensions/test_cluster.rb +163 -0
  11. data/lib/elasticsearch/transport/transport/base.rb +236 -0
  12. data/lib/elasticsearch/transport/transport/connections/collection.rb +93 -0
  13. data/lib/elasticsearch/transport/transport/connections/connection.rb +117 -0
  14. data/lib/elasticsearch/transport/transport/connections/selector.rb +63 -0
  15. data/lib/elasticsearch/transport/transport/errors.rb +73 -0
  16. data/lib/elasticsearch/transport/transport/http/curb.rb +70 -0
  17. data/lib/elasticsearch/transport/transport/http/faraday.rb +59 -0
  18. data/lib/elasticsearch/transport/transport/response.rb +20 -0
  19. data/lib/elasticsearch/transport/transport/serializer/multi_json.rb +36 -0
  20. data/lib/elasticsearch/transport/transport/sniffer.rb +46 -0
  21. data/lib/elasticsearch/transport/version.rb +5 -0
  22. data/test/integration/client_test.rb +117 -0
  23. data/test/integration/transport_test.rb +37 -0
  24. data/test/profile/client_benchmark_test.rb +107 -0
  25. data/test/test_extensions.rb +139 -0
  26. data/test/test_helper.rb +58 -0
  27. data/test/unit/client_test.rb +109 -0
  28. data/test/unit/connection_collection_test.rb +83 -0
  29. data/test/unit/connection_selector_test.rb +64 -0
  30. data/test/unit/connection_test.rb +90 -0
  31. data/test/unit/serializer_test.rb +16 -0
  32. data/test/unit/sniffer_test.rb +146 -0
  33. data/test/unit/transport_base_test.rb +402 -0
  34. data/test/unit/transport_curb_test.rb +59 -0
  35. data/test/unit/transport_faraday_test.rb +73 -0
  36. metadata +342 -0
@@ -0,0 +1,123 @@
1
+ module Elasticsearch
2
+ module Transport
3
+
4
+ # Handles communication with an Elasticsearch cluster.
5
+ #
6
+ # See {file:README.md README} for usage and code examples.
7
+ #
8
+ class Client
9
+ DEFAULT_TRANSPORT_CLASS = Transport::HTTP::Faraday
10
+
11
+ DEFAULT_LOGGER = lambda do
12
+ require 'logger'
13
+ logger = Logger.new(STDERR)
14
+ logger.progname = 'elasticsearch'
15
+ logger.formatter = proc { |severity, datetime, progname, msg| "#{datetime}: #{msg}\n" }
16
+ logger
17
+ end
18
+
19
+ DEFAULT_TRACER = lambda do
20
+ require 'logger'
21
+ logger = Logger.new(STDERR)
22
+ logger.progname = 'elasticsearch.tracer'
23
+ logger.formatter = proc { |severity, datetime, progname, msg| "#{msg}\n" }
24
+ logger
25
+ end
26
+
27
+ # Returns the transport object.
28
+ #
29
+ # @see Elasticsearch::Transport::Transport::Base
30
+ # @see Elasticsearch::Transport::Transport::HTTP::Faraday
31
+ #
32
+ attr_accessor :transport
33
+
34
+ # Create a client connected to an Elasticsearch cluster.
35
+ #
36
+ # @option arguments [String,Array] :hosts Single host passed as a String or Hash, or multiple hosts
37
+ # passed as an Array; `host` or `url` keys are also valid
38
+ #
39
+ # @option arguments [Boolean] :log Use the default logger (disabled by default)
40
+ #
41
+ # @option arguments [Boolean] :trace Use the default tracer (disabled by default)
42
+ #
43
+ # @option arguments [Object] :logger An instance of a Logger-compatible object
44
+ #
45
+ # @option arguments [Object] :tracer An instance of a Logger-compatible object
46
+ #
47
+ # @option arguments [Number] :resurrect_after After how many seconds a dead connection should be tried again
48
+ #
49
+ # @option arguments [Boolean,Number] :reload_connections Reload connections after X requests (false by default)
50
+ #
51
+ # @option arguments [Boolean] :randomize_hosts Shuffle connections on initialization and reload (false by default)
52
+ #
53
+ # @option arguments [Integer] :sniffer_timeout Timeout for reloading connections in seconds (1 by default)
54
+ #
55
+ # @option arguments [Boolean,Number] :retry_on_failure Retry X times when request fails before raising and
56
+ # exception (false by default)
57
+ #
58
+ # @option arguments [Boolean] :reload_on_failure Reload connections after failure (false by default)
59
+ #
60
+ # @option arguments [Constant] :transport_class A specific transport class to use, will be initialized by
61
+ # the client and passed hosts and all arguments
62
+ #
63
+ # @option arguments [Object] :transport A specific transport instance
64
+ #
65
+ # @option arguments [Constant] :serializer_class A specific serializer class to use, will be initialized by
66
+ # the transport and passed the transport instance
67
+ #
68
+ # @option arguments [Constant] :selector An instance of selector strategy implemented with
69
+ # {Elasticsearch::Transport::Transport::Connections::Selector::Base}.
70
+ #
71
+ def initialize(arguments={})
72
+ transport_class = arguments[:transport_class] || DEFAULT_TRANSPORT_CLASS
73
+ hosts = arguments[:hosts] || arguments[:host] || arguments[:url]
74
+
75
+ arguments[:logger] ||= arguments[:log] ? DEFAULT_LOGGER.call() : nil
76
+ arguments[:tracer] ||= arguments[:trace] ? DEFAULT_TRACER.call() : nil
77
+ arguments[:reload_connections] ||= false
78
+ arguments[:retry_on_failure] ||= false
79
+ arguments[:reload_on_failure] ||= false
80
+ arguments[:randomize_hosts] ||= false
81
+
82
+ @transport = arguments[:transport] || \
83
+ transport_class.new(:hosts => __extract_hosts(hosts, arguments), :options => arguments)
84
+ end
85
+
86
+ # Performs a request through delegation to {#transport}.
87
+ #
88
+ def perform_request(method, path, params={}, body=nil)
89
+ transport.perform_request method, path, params, body
90
+ end
91
+
92
+ # Normalizes and returns hosts configuration.
93
+ #
94
+ # Arrayifies the `hosts_config` argument and extracts `host` and `port` info from strings.
95
+ # Performs shuffling when the `randomize_hosts` option is set.
96
+ #
97
+ # @return [Array<Hash>]
98
+ # @raise [ArgumentError]
99
+ #
100
+ # @api private
101
+ #
102
+ def __extract_hosts(hosts_config=nil, options={})
103
+ hosts_config = hosts_config.nil? ? ['localhost'] : Array(hosts_config)
104
+
105
+ hosts = hosts_config.map do |host|
106
+ case host
107
+ when String
108
+ # TODO: Handle protocol?
109
+ host, port = host.split(':')
110
+ { :host => host, :port => port }
111
+ when Hash
112
+ host
113
+ else
114
+ raise ArgumentError, "Please pass host as a String or Hash, #{host.class} given."
115
+ end
116
+ end
117
+
118
+ hosts.shuffle! if options[:randomize_hosts]
119
+ hosts
120
+ end
121
+ end
122
+ end
123
+ end
@@ -0,0 +1,163 @@
1
+ require 'ansi/code'
2
+
3
+ module Elasticsearch
4
+
5
+ # A convenience Ruby class for starting and stopping a separate testing cluster,
6
+ # to not depend on -- and not mess up -- <localhost:9200>.
7
+ #
8
+ module TestCluster
9
+ require 'timeout'
10
+ require 'net/http'
11
+ require 'uri'
12
+
13
+ @@number_of_nodes = 2
14
+ @@pids = []
15
+
16
+ # Start a cluster
17
+ #
18
+ # Starts the desired number of nodes in test-suitable configuration (memory store, no persistence, etc).
19
+ #
20
+ # @option arguments [String] :command Elasticsearch command (default: `elasticsearch`).
21
+ # @option arguments [Integer] :count Number of desired nodes (default: 2).
22
+ # @option arguments [String] :cluster_name Cluster name (default: `elasticsearch-ruby-test`).
23
+ # @option arguments [String] :port Starting port number; will be auto-incremented (default: 9250).
24
+ #
25
+ # You can also use environment variables to set these options.
26
+ #
27
+ def start(arguments={})
28
+ arguments[:command] = ENV['TEST_CLUSTER_COMMAND'] || 'elasticsearch'
29
+
30
+ unless system "which #{arguments[:command]} > /dev/null 2>&1"
31
+ STDERR.puts ANSI.red("[ERROR] Elasticsearch can't be started, is it installed? Run: $ which elasticsearch"), ''
32
+ abort
33
+ end
34
+
35
+ @@number_of_nodes = arguments[:count] if arguments[:count]
36
+
37
+ arguments[:port] = (ENV['TEST_CLUSTER_PORT'] || 9250).to_i
38
+ arguments[:cluster_name] = ENV['TEST_CLUSTER_NAME'] || 'elasticsearch-ruby-test'
39
+ arguments[:node_name] = 'node'
40
+
41
+ if running? :on => arguments[:port], :as => arguments[:cluster_name]
42
+ print ANSI.red("Elasticsearch cluster already running")
43
+ __wait_for_green(arguments[:port])
44
+ exit(0)
45
+ end
46
+
47
+ print ANSI.faint("Starting ") + ANSI.ansi(@@number_of_nodes.to_s, :bold, :faint) + ANSI.faint(" Elasticsearch nodes")
48
+
49
+ @@number_of_nodes.times do |n|
50
+ n += 1
51
+ pidfile = File.expand_path("tmp/elasticsearch-#{n}.pid", Dir.pwd)
52
+ pid = Process.spawn <<-COMMAND
53
+ #{arguments[:command]} \
54
+ -D es.foreground=yes \
55
+ -D es.cluster.name=#{arguments[:cluster_name]} \
56
+ -D es.node.name=#{arguments[:node_name]}-#{n} \
57
+ -D es.http.port=#{arguments[:port].to_i + (n-1)} \
58
+ -D es.gateway.type=none \
59
+ -D es.index.store.type=memory \
60
+ -D es.network.host=0.0.0.0 \
61
+ -D es.discovery.zen.ping.multicast.enabled=true \
62
+ -D es.pidfile=#{pidfile} \
63
+ > /dev/null 2>&1
64
+ COMMAND
65
+ Process.detach pid
66
+ end
67
+
68
+ __wait_for_green(arguments[:port])
69
+ end
70
+
71
+ # Stop the cluster.
72
+ #
73
+ # Gets the PID numbers from pidfiles in `$CWD/tmp` and stops any matching nodes.
74
+ #
75
+ def stop
76
+ pids = __get_pids
77
+ pidfiles = __get_pidfiles
78
+
79
+ unless pids.empty?
80
+ print "Stopping Elasticsearch nodes... "
81
+ pids.each_with_index do |pid, i|
82
+ begin
83
+ print ANSI.green("stopped PID #{pid}. ") if Process.kill 'KILL', pid
84
+ rescue Exception => e
85
+ print ANSI.red("[#{e.class}] PID #{pid} not found. ")
86
+ end
87
+ File.delete pidfiles[i] if pidfiles[i] && File.exists?(pidfiles[i])
88
+ end
89
+ puts
90
+ end
91
+ end
92
+
93
+ # Returns true when a specific test node is running.
94
+ #
95
+ # @option arguments [Integer] :on The port on which the node is running.
96
+ # @option arguments [String] :as The cluster name.
97
+ #
98
+ def running?(arguments={})
99
+ port = arguments[:on] || 9250
100
+ cluster_name = arguments[:as] || 'elasticsearch-ruby-test'
101
+
102
+ if cluster_health = Timeout::timeout(0.25) { __get_cluster_health(port) } rescue nil
103
+ return cluster_health['cluster_name'] == cluster_name && \
104
+ cluster_health['number_of_nodes'] == @@number_of_nodes
105
+ end
106
+ return false
107
+ end
108
+
109
+ # Blocks the process and waits for the cluster to be in a "green" state.
110
+ # Prints information about the cluster on STDOUT.
111
+ #
112
+ def __wait_for_green(port=9250)
113
+ uri = URI("http://localhost:#{port}/_cluster/health")
114
+
115
+ Timeout::timeout(30) do
116
+ loop do
117
+ response = Net::HTTP.get(uri) rescue nil
118
+ if response
119
+ pids = __get_pids
120
+
121
+ json = MultiJson.load(response)
122
+ if json['status'] == 'green' && json['number_of_nodes'].to_i == @@number_of_nodes
123
+ puts '',
124
+ ANSI.faint('-'*80),
125
+ ANSI.faint(
126
+ 'Cluster: '.ljust(20) + json['cluster_name'].to_s + "\n" +
127
+ 'Status: '.ljust(20) + json['status'].to_s + "\n" +
128
+ 'Number of nodes: '.ljust(20) + json['number_of_nodes'].to_s + "\n" +
129
+ 'PIDs'.ljust(20) + pids.inspect
130
+ ),
131
+ ANSI.faint('-'*80)
132
+ break
133
+ end
134
+ end
135
+ print ANSI.faint('.')
136
+ sleep 1
137
+ end
138
+ end
139
+ end
140
+
141
+ # Tries to load cluster health information
142
+ #
143
+ def __get_cluster_health(port=9250)
144
+ uri = URI("http://localhost:#{port}/_cluster/health")
145
+ if response = Net::HTTP.get(uri) rescue nil
146
+ return MultiJson.load(response)
147
+ end
148
+ end
149
+
150
+ # Returns a collection of PID numbers from pidfiles.
151
+ def __get_pids
152
+ __get_pidfiles.map { |pidfile| File.read(pidfile).to_i }.uniq
153
+ end
154
+
155
+ # Returns a collection of files with PID information.
156
+ #
157
+ def __get_pidfiles
158
+ Dir[File.expand_path('tmp/elasticsearch-*.pid', Dir.pwd)]
159
+ end
160
+
161
+ extend self
162
+ end
163
+ end
@@ -0,0 +1,236 @@
1
+ module Elasticsearch
2
+ module Transport
3
+ module Transport
4
+
5
+ # @abstract Module with common functionality for transport implementations.
6
+ #
7
+ module Base
8
+ DEFAULT_PORT = 9200
9
+ DEFAULT_PROTOCOL = 'http'
10
+ DEFAULT_RELOAD_AFTER = 10_000 # Requests
11
+ DEFAULT_RESURRECT_AFTER = 60 # Seconds
12
+ DEFAULT_MAX_TRIES = 3 # Requests
13
+ DEFAULT_SERIALIZER_CLASS = Serializer::MultiJson
14
+
15
+ attr_reader :hosts, :options, :connections, :counter, :last_request_at, :protocol
16
+ attr_accessor :serializer, :sniffer, :logger, :tracer, :reload_after, :resurrect_after, :max_tries
17
+
18
+ # Creates a new transport object.
19
+ #
20
+ # @param arguments [Hash] Settings and options for the transport
21
+ # @param block [Proc] Lambda or Proc which can be evaluated in the context of the "session" object
22
+ #
23
+ # @option arguments [Array] :hosts An Array of normalized hosts information
24
+ # @option arguments [Array] :options A Hash with options (usually passed by {Client})
25
+ #
26
+ # @see Client#initialize
27
+ #
28
+ def initialize(arguments={}, &block)
29
+ @hosts = arguments[:hosts] || []
30
+ @options = arguments[:options] || {}
31
+ @block = block
32
+ @connections = __build_connections
33
+
34
+ @serializer = options[:serializer] || ( options[:serializer_class] ? options[:serializer_class].new(self) : DEFAULT_SERIALIZER_CLASS.new(self) )
35
+ @protocol = options[:protocol] || DEFAULT_PROTOCOL
36
+
37
+ @logger = options[:logger]
38
+ @tracer = options[:tracer]
39
+
40
+ @sniffer = options[:sniffer_class] ? options[:sniffer_class].new(self) : Sniffer.new(self)
41
+ @counter = 0
42
+ @last_request_at = Time.now
43
+ @reload_after = options[:reload_connections].is_a?(Fixnum) ? options[:reload_connections] : DEFAULT_RELOAD_AFTER
44
+ @resurrect_after = options[:resurrect_after] || DEFAULT_RESURRECT_AFTER
45
+ @max_tries = options[:retry_on_failure].is_a?(Fixnum) ? options[:retry_on_failure] : DEFAULT_MAX_TRIES
46
+ end
47
+
48
+ # Returns a connection from the connection pool by delegating to {Connections::Collection#get_connection}.
49
+ #
50
+ # Resurrects dead connection if the `resurrect_after` timeout has passed.
51
+ # Increments the counter and performs connection reloading if the `reload_connections` option is set.
52
+ #
53
+ # @return [Connections::Connection]
54
+ # @see Connections::Collection#get_connection
55
+ #
56
+ def get_connection(options={})
57
+ resurrect_dead_connections! if Time.now > @last_request_at + @resurrect_after
58
+
59
+ connection = connections.get_connection(options)
60
+ @counter += 1
61
+
62
+ reload_connections! if @options[:reload_connections] && counter % reload_after == 0
63
+ connection
64
+ end
65
+
66
+ # Reloads and replaces the connection collection based on cluster information.
67
+ #
68
+ # @see Sniffer#hosts
69
+ #
70
+ def reload_connections!
71
+ hosts = sniffer.hosts
72
+ __rebuild_connections :hosts => hosts, :options => options
73
+ self
74
+ rescue SnifferTimeoutError
75
+ logger.error "[SnifferTimeoutError] Timeout when reloading connections." if logger
76
+ self
77
+ end
78
+
79
+ # Tries to "resurrect" all eligible dead connections.
80
+ #
81
+ # @see Connections::Connection#resurrect!
82
+ #
83
+ def resurrect_dead_connections!
84
+ connections.dead.each { |c| c.resurrect! }
85
+ end
86
+
87
+ # Replaces the connections collection.
88
+ #
89
+ # @api private
90
+ #
91
+ def __rebuild_connections(arguments={})
92
+ @hosts = arguments[:hosts] || []
93
+ @options = arguments[:options] || {}
94
+ @connections = __build_connections
95
+ end
96
+
97
+ # Log request and response information.
98
+ #
99
+ # @api private
100
+ #
101
+ def __log(method, path, params, body, url, response, json, took, duration)
102
+ logger.info "#{method.to_s.upcase} #{url} " +
103
+ "[status:#{response.status}, request:#{sprintf('%.3fs', duration)}, query:#{took}]"
104
+ logger.debug "> #{__convert_to_json(body)}" if body
105
+ logger.debug "< #{response.body}"
106
+ end
107
+
108
+ # Log failed request.
109
+ #
110
+ # @api private
111
+ def __log_failed(response)
112
+ logger.fatal "[#{response.status}] #{response.body}"
113
+ end
114
+
115
+ # Trace the request in the `curl` format.
116
+ #
117
+ # @api private
118
+ def __trace(method, path, params, body, url, response, json, took, duration)
119
+ trace_url = "http://localhost:9200/#{path}?pretty" +
120
+ ( params.empty? ? '' : "&#{::Faraday::Utils::ParamsHash[params].to_query}" )
121
+ trace_body = body ? " -d '#{__convert_to_json(body, :pretty => true)}'" : ''
122
+ tracer.info "curl -X #{method.to_s.upcase} '#{trace_url}'#{trace_body}\n"
123
+ tracer.debug "# #{Time.now.iso8601} [#{response.status}] (#{format('%.3f', duration)}s)\n#"
124
+ tracer.debug json ? serializer.dump(json, :pretty => true).gsub(/^/, '# ').sub(/\}$/, "\n# }")+"\n" : "# #{response.body}\n"
125
+ end
126
+
127
+ # Raise error specific for the HTTP response status or a generic server error
128
+ #
129
+ # @api private
130
+ def __raise_transport_error(response)
131
+ error = ERRORS[response.status] || ServerError
132
+ raise error.new "[#{response.status}] #{response.body}"
133
+ end
134
+
135
+ # Converts any non-String object to JSON
136
+ #
137
+ # @api private
138
+ def __convert_to_json(o=nil, options={})
139
+ o = o.is_a?(String) ? o : serializer.dump(o, options)
140
+ end
141
+
142
+ # Performs a request to Elasticsearch, while handling logging, tracing, marking dead connections,
143
+ # retrying the request and reloading the connections.
144
+ #
145
+ # @abstract The transport implementation has to implement this method either in full,
146
+ # or by invoking this method with a block. See {HTTP::Faraday#perform_request} for an example.
147
+ #
148
+ # @param method [String] Request method
149
+ # @param path [String] The API endpoint
150
+ # @param params [Hash] Request parameters (will be serialized by {Connections::Connection#full_url})
151
+ # @param body [Hash] Request body (will be serialized by the {#serializer})
152
+ # @param block [Proc] Code block to evaluate, passed from the implementation
153
+ #
154
+ # @return [Response]
155
+ # @raise [NoMethodError] If no block is passed
156
+ # @raise [ServerError] If request failed on server
157
+ # @raise [Error] If no connection is available
158
+ #
159
+ def perform_request(method, path, params={}, body=nil, &block)
160
+ raise NoMethodError, "Implement this method in your transport class" unless block_given?
161
+ start = Time.now if logger || tracer
162
+ tries = 0
163
+
164
+ begin
165
+ tries += 1
166
+ connection = get_connection or raise Error.new("Cannot get new connection from pool.")
167
+ url = connection.full_url(path, params)
168
+ response = block.call(connection, url)
169
+
170
+ connection.healthy! if connection.failures > 0
171
+
172
+ rescue *host_unreachable_exceptions => e
173
+ logger.error "[#{e.class}] #{e.message} #{connection.host.inspect}" if logger
174
+
175
+ connection.dead!
176
+
177
+ if @options[:reload_on_failure] and tries < connections.all.size
178
+ logger.warn "[#{e.class}] Reloading connections (attempt #{tries} of #{connections.size})" if logger
179
+ reload_connections! and retry
180
+ end
181
+
182
+ if @options[:retry_on_failure]
183
+ logger.warn "[#{e.class}] Attempt #{tries} connecting to #{connection.host.inspect}" if logger
184
+ if tries < max_tries
185
+ retry
186
+ else
187
+ logger.fatal "[#{e.class}] Cannot connect to #{connection.host.inspect} after #{tries} tries" if logger
188
+ raise e
189
+ end
190
+ else
191
+ raise e
192
+ end
193
+
194
+ rescue Exception => e
195
+ logger.fatal "[#{e.class}] #{e.message} (#{connection.host.inspect})" if logger
196
+ raise e
197
+ end
198
+
199
+ json = serializer.load(response.body) if response.body.to_s =~ /^\{/
200
+ took = (json['took'] ? sprintf('%.3fs', json['took']/1000.0) : 'n/a') rescue 'n/a' if logger || tracer
201
+ duration = Time.now-start if logger || tracer
202
+
203
+ __log method, path, params, body, url, response, json, took, duration if logger
204
+ __trace method, path, params, body, url, response, json, took, duration if tracer
205
+
206
+ if response.status.to_i >= 300
207
+ __log_failed response if logger
208
+ __raise_transport_error response
209
+ else
210
+ Response.new response.status, json || response.body, response.headers
211
+ end
212
+ ensure
213
+ @last_request_at = Time.now
214
+ end
215
+
216
+ # @abstract Returns an Array of connection errors specific to the transport implementation.
217
+ # See {HTTP::Faraday#host_unreachable_exceptions} for an example.
218
+ #
219
+ # @return [Array]
220
+ #
221
+ def host_unreachable_exceptions
222
+ [Errno::ECONNREFUSED]
223
+ end
224
+
225
+ # @abstract A transport implementation must implement this method.
226
+ # See {HTTP::Faraday#__build_connections} for an example.
227
+ #
228
+ # @return [Connections::Collection]
229
+ # @api private
230
+ def __build_connections
231
+ raise NoMethodError, "Implement this method in your class"
232
+ end
233
+ end
234
+ end
235
+ end
236
+ end