analytics-ruby 2.2.3.pre → 2.2.4.pre
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/History.md +16 -0
- data/Makefile +17 -8
- data/README.md +2 -2
- data/RELEASING.md +2 -3
- data/Rakefile +17 -1
- data/analytics-ruby.gemspec +10 -2
- data/codecov.yml +2 -0
- data/lib/analytics-ruby.rb +1 -0
- data/lib/segment/analytics.rb +9 -2
- data/lib/segment/analytics/backoff_policy.rb +49 -0
- data/lib/segment/analytics/client.rb +148 -99
- data/lib/segment/analytics/defaults.rb +20 -4
- data/lib/segment/analytics/logging.rb +2 -4
- data/lib/segment/analytics/message.rb +26 -0
- data/lib/segment/analytics/message_batch.rb +58 -0
- data/lib/segment/analytics/request.rb +84 -32
- data/lib/segment/analytics/response.rb +0 -1
- data/lib/segment/analytics/utils.rb +19 -16
- data/lib/segment/analytics/version.rb +1 -1
- data/lib/segment/analytics/worker.rb +11 -10
- data/spec/helpers/runscope_client.rb +38 -0
- data/spec/segment/analytics/backoff_policy_spec.rb +92 -0
- data/spec/segment/analytics/client_spec.rb +61 -44
- data/spec/segment/analytics/e2e_spec.rb +48 -0
- data/spec/segment/analytics/message_batch_spec.rb +49 -0
- data/spec/segment/analytics/message_spec.rb +35 -0
- data/spec/segment/analytics/request_spec.rb +87 -34
- data/spec/segment/analytics/worker_spec.rb +24 -16
- data/spec/spec_helper.rb +32 -6
- metadata +73 -17
- data/Gemfile.lock +0 -43
- data/analytics-ruby-2.0.13.gem +0 -0
- data/analytics-ruby-2.1.0.gem +0 -0
- data/analytics-ruby-2.2.2.gem +0 -0
@@ -6,15 +6,31 @@ module Segment
|
|
6
6
|
PORT = 443
|
7
7
|
PATH = '/v1/import'
|
8
8
|
SSL = true
|
9
|
-
HEADERS = {
|
10
|
-
|
11
|
-
|
9
|
+
HEADERS = { 'Accept' => 'application/json',
|
10
|
+
'Content-Type' => 'application/json',
|
11
|
+
'User-Agent' => "analytics-ruby/#{Analytics::VERSION}" }
|
12
|
+
RETRIES = 10
|
12
13
|
end
|
13
14
|
|
14
15
|
module Queue
|
15
|
-
BATCH_SIZE = 100
|
16
16
|
MAX_SIZE = 10000
|
17
17
|
end
|
18
|
+
|
19
|
+
module Message
|
20
|
+
MAX_BYTES = 32768 # 32Kb
|
21
|
+
end
|
22
|
+
|
23
|
+
module MessageBatch
|
24
|
+
MAX_BYTES = 512_000 # 500Kb
|
25
|
+
MAX_SIZE = 100
|
26
|
+
end
|
27
|
+
|
28
|
+
module BackoffPolicy
|
29
|
+
MIN_TIMEOUT_MS = 100
|
30
|
+
MAX_TIMEOUT_MS = 10000
|
31
|
+
MULTIPLIER = 1.5
|
32
|
+
RANDOMIZATION_FACTOR = 0.5
|
33
|
+
end
|
18
34
|
end
|
19
35
|
end
|
20
36
|
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'segment/analytics/defaults'
|
2
|
+
|
3
|
+
module Segment
|
4
|
+
class Analytics
|
5
|
+
# Represents a message to be sent to the API
|
6
|
+
class Message
|
7
|
+
def initialize(hash)
|
8
|
+
@hash = hash
|
9
|
+
end
|
10
|
+
|
11
|
+
def too_big?
|
12
|
+
json_size > Defaults::Message::MAX_BYTES
|
13
|
+
end
|
14
|
+
|
15
|
+
def json_size
|
16
|
+
to_json.bytesize
|
17
|
+
end
|
18
|
+
|
19
|
+
# Since the hash is expected to not be modified (set at initialization),
|
20
|
+
# the JSON version can be cached after the first computation.
|
21
|
+
def to_json(*args)
|
22
|
+
@json ||= @hash.to_json(*args)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
require 'segment/analytics/logging'
|
2
|
+
|
3
|
+
module Segment
|
4
|
+
class Analytics
|
5
|
+
# A batch of `Message`s to be sent to the API
|
6
|
+
class MessageBatch
|
7
|
+
extend Forwardable
|
8
|
+
include Segment::Analytics::Logging
|
9
|
+
include Segment::Analytics::Defaults::MessageBatch
|
10
|
+
|
11
|
+
def initialize(max_message_count)
|
12
|
+
@messages = []
|
13
|
+
@max_message_count = max_message_count
|
14
|
+
@json_size = 0
|
15
|
+
end
|
16
|
+
|
17
|
+
def <<(message)
|
18
|
+
if message.too_big?
|
19
|
+
logger.error('a message exceeded the maximum allowed size')
|
20
|
+
else
|
21
|
+
@messages << message
|
22
|
+
@json_size += message.json_size + 1 # One byte for the comma
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def full?
|
27
|
+
item_count_exhausted? || size_exhausted?
|
28
|
+
end
|
29
|
+
|
30
|
+
def clear
|
31
|
+
@messages.clear
|
32
|
+
@json_size = 0
|
33
|
+
end
|
34
|
+
|
35
|
+
def_delegators :@messages, :to_json
|
36
|
+
def_delegators :@messages, :empty?
|
37
|
+
def_delegators :@messages, :length
|
38
|
+
|
39
|
+
private
|
40
|
+
|
41
|
+
def item_count_exhausted?
|
42
|
+
@messages.length >= @max_message_count
|
43
|
+
end
|
44
|
+
|
45
|
+
# We consider the max size here as just enough to leave room for one more
|
46
|
+
# message of the largest size possible. This is a shortcut that allows us
|
47
|
+
# to use a native Ruby `Queue` that doesn't allow peeking. The tradeoff
|
48
|
+
# here is that we might fit in less messages than possible into a batch.
|
49
|
+
#
|
50
|
+
# The alternative is to use our own `Queue` implementation that allows
|
51
|
+
# peeking, and to consider the next message size when calculating whether
|
52
|
+
# the message can be accomodated in this batch.
|
53
|
+
def size_exhausted?
|
54
|
+
@json_size >= (MAX_BYTES - Defaults::Message::MAX_BYTES)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -2,6 +2,7 @@ require 'segment/analytics/defaults'
|
|
2
2
|
require 'segment/analytics/utils'
|
3
3
|
require 'segment/analytics/response'
|
4
4
|
require 'segment/analytics/logging'
|
5
|
+
require 'segment/analytics/backoff_policy'
|
5
6
|
require 'net/http'
|
6
7
|
require 'net/https'
|
7
8
|
require 'json'
|
@@ -19,10 +20,11 @@ module Segment
|
|
19
20
|
options[:host] ||= HOST
|
20
21
|
options[:port] ||= PORT
|
21
22
|
options[:ssl] ||= SSL
|
22
|
-
options[:headers]
|
23
|
+
@headers = options[:headers] || HEADERS
|
23
24
|
@path = options[:path] || PATH
|
24
25
|
@retries = options[:retries] || RETRIES
|
25
|
-
@
|
26
|
+
@backoff_policy =
|
27
|
+
options[:backoff_policy] || Segment::Analytics::BackoffPolicy.new
|
26
28
|
|
27
29
|
http = Net::HTTP.new(options[:host], options[:port])
|
28
30
|
http.use_ssl = options[:ssl]
|
@@ -36,42 +38,92 @@ module Segment
|
|
36
38
|
#
|
37
39
|
# returns - Response of the status and error if it exists
|
38
40
|
def post(write_key, batch)
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
41
|
+
last_response, exception = retry_with_backoff(@retries) do
|
42
|
+
status_code, body = send_request(write_key, batch)
|
43
|
+
error = JSON.parse(body)['error']
|
44
|
+
should_retry = should_retry_request?(status_code, body)
|
45
|
+
|
46
|
+
[Response.new(status_code, error), should_retry]
|
47
|
+
end
|
48
|
+
|
49
|
+
if exception
|
50
|
+
logger.error(exception.message)
|
51
|
+
exception.backtrace.each { |line| logger.error(line) }
|
52
|
+
Response.new(-1, "Connection error: #{exception}")
|
53
|
+
else
|
54
|
+
last_response
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
private
|
59
|
+
|
60
|
+
def should_retry_request?(status_code, body)
|
61
|
+
if status_code >= 500
|
62
|
+
true # Server error
|
63
|
+
elsif status_code == 429
|
64
|
+
true # Rate limited
|
65
|
+
elsif status_code >= 400
|
66
|
+
logger.error(body)
|
67
|
+
false # Client error. Do not retry, but log
|
68
|
+
else
|
69
|
+
false
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
# Takes a block that returns [result, should_retry].
|
74
|
+
#
|
75
|
+
# Retries upto `retries_remaining` times, if `should_retry` is false or
|
76
|
+
# an exception is raised. `@backoff_policy` is used to determine the
|
77
|
+
# duration to sleep between attempts
|
78
|
+
#
|
79
|
+
# Returns [last_result, raised_exception]
|
80
|
+
def retry_with_backoff(retries_remaining, &block)
|
81
|
+
result, caught_exception = nil
|
82
|
+
should_retry = false
|
83
|
+
|
43
84
|
begin
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
error = body["error"]
|
57
|
-
end
|
58
|
-
rescue Exception => e
|
59
|
-
unless (remaining_retries -=1).zero?
|
60
|
-
sleep(backoff)
|
61
|
-
retry
|
62
|
-
end
|
63
|
-
|
64
|
-
logger.error e.message
|
65
|
-
e.backtrace.each { |line| logger.error line }
|
66
|
-
status = -1
|
67
|
-
error = "Connection error: #{e}"
|
85
|
+
result, should_retry = yield
|
86
|
+
return [result, nil] unless should_retry
|
87
|
+
rescue StandardError => e
|
88
|
+
should_retry = true
|
89
|
+
caught_exception = e
|
90
|
+
end
|
91
|
+
|
92
|
+
if should_retry && (retries_remaining > 1)
|
93
|
+
sleep(@backoff_policy.next_interval.to_f / 1000)
|
94
|
+
retry_with_backoff(retries_remaining - 1, &block)
|
95
|
+
else
|
96
|
+
[result, caught_exception]
|
68
97
|
end
|
98
|
+
end
|
99
|
+
|
100
|
+
# Sends a request for the batch, returns [status_code, body]
|
101
|
+
def send_request(write_key, batch)
|
102
|
+
payload = JSON.generate(
|
103
|
+
:sentAt => datetime_in_iso8601(Time.now),
|
104
|
+
:batch => batch
|
105
|
+
)
|
106
|
+
request = Net::HTTP::Post.new(@path, @headers)
|
107
|
+
request.basic_auth(write_key, nil)
|
108
|
+
|
109
|
+
if self.class.stub
|
110
|
+
logger.debug "stubbed request to #{@path}: " \
|
111
|
+
"write key = #{write_key}, batch = JSON.generate(#{batch})"
|
69
112
|
|
70
|
-
|
113
|
+
[200, '{}']
|
114
|
+
else
|
115
|
+
# If `start` is not called, Ruby adds a 'Connection: close' header to
|
116
|
+
# all requests, preventing us from reusing a connection for multiple
|
117
|
+
# HTTP requests
|
118
|
+
@http.start unless @http.started?
|
119
|
+
|
120
|
+
response = @http.request(request, payload)
|
121
|
+
[response.code.to_i, response.body]
|
122
|
+
end
|
71
123
|
end
|
72
124
|
|
73
125
|
class << self
|
74
|
-
|
126
|
+
attr_writer :stub
|
75
127
|
|
76
128
|
def stub
|
77
129
|
@stub || ENV['STUB']
|
@@ -8,7 +8,9 @@ module Segment
|
|
8
8
|
# public: Return a new hash with keys converted from strings to symbols
|
9
9
|
#
|
10
10
|
def symbolize_keys(hash)
|
11
|
-
hash.
|
11
|
+
hash.each_with_object({}) do |(k, v), memo|
|
12
|
+
memo[k.to_sym] = v
|
13
|
+
end
|
12
14
|
end
|
13
15
|
|
14
16
|
# public: Convert hash keys from strings to symbols in place
|
@@ -20,17 +22,18 @@ module Segment
|
|
20
22
|
# public: Return a new hash with keys as strings
|
21
23
|
#
|
22
24
|
def stringify_keys(hash)
|
23
|
-
hash.
|
25
|
+
hash.each_with_object({}) do |(k, v), memo|
|
26
|
+
memo[k.to_s] = v
|
27
|
+
end
|
24
28
|
end
|
25
29
|
|
26
30
|
# public: Returns a new hash with all the date values in the into iso8601
|
27
31
|
# strings
|
28
32
|
#
|
29
33
|
def isoify_dates(hash)
|
30
|
-
hash.
|
34
|
+
hash.each_with_object({}) do |(k, v), memo|
|
31
35
|
memo[k] = datetime_in_iso8601(v)
|
32
|
-
|
33
|
-
}
|
36
|
+
end
|
34
37
|
end
|
35
38
|
|
36
39
|
# public: Converts all the date values in the into iso8601 strings in place
|
@@ -42,18 +45,18 @@ module Segment
|
|
42
45
|
# public: Returns a uid string
|
43
46
|
#
|
44
47
|
def uid
|
45
|
-
arr = SecureRandom.random_bytes(16).unpack(
|
48
|
+
arr = SecureRandom.random_bytes(16).unpack('NnnnnN')
|
46
49
|
arr[2] = (arr[2] & 0x0fff) | 0x4000
|
47
50
|
arr[3] = (arr[3] & 0x3fff) | 0x8000
|
48
|
-
|
51
|
+
'%08x-%04x-%04x-%04x-%04x%08x' % arr
|
49
52
|
end
|
50
53
|
|
51
|
-
def datetime_in_iso8601
|
54
|
+
def datetime_in_iso8601(datetime)
|
52
55
|
case datetime
|
53
56
|
when Time
|
54
|
-
|
57
|
+
time_in_iso8601 datetime
|
55
58
|
when DateTime
|
56
|
-
|
59
|
+
time_in_iso8601 datetime.to_time
|
57
60
|
when Date
|
58
61
|
date_in_iso8601 datetime
|
59
62
|
else
|
@@ -61,19 +64,19 @@ module Segment
|
|
61
64
|
end
|
62
65
|
end
|
63
66
|
|
64
|
-
def time_in_iso8601
|
67
|
+
def time_in_iso8601(time, fraction_digits = 3)
|
65
68
|
fraction = if fraction_digits > 0
|
66
|
-
(
|
69
|
+
('.%06i' % time.usec)[0, fraction_digits + 1]
|
67
70
|
end
|
68
71
|
|
69
|
-
"#{time.strftime(
|
72
|
+
"#{time.strftime('%Y-%m-%dT%H:%M:%S')}#{fraction}#{formatted_offset(time, true, 'Z')}"
|
70
73
|
end
|
71
74
|
|
72
|
-
def date_in_iso8601
|
73
|
-
date.strftime(
|
75
|
+
def date_in_iso8601(date)
|
76
|
+
date.strftime('%F')
|
74
77
|
end
|
75
78
|
|
76
|
-
def formatted_offset
|
79
|
+
def formatted_offset(time, colon = true, alternate_utc_string = nil)
|
77
80
|
time.utc? && alternate_utc_string || seconds_to_utc_offset(time.utc_offset, colon)
|
78
81
|
end
|
79
82
|
|
@@ -1,7 +1,8 @@
|
|
1
1
|
require 'segment/analytics/defaults'
|
2
|
-
require 'segment/analytics/
|
3
|
-
require 'segment/analytics/
|
2
|
+
require 'segment/analytics/message'
|
3
|
+
require 'segment/analytics/message_batch'
|
4
4
|
require 'segment/analytics/request'
|
5
|
+
require 'segment/analytics/utils'
|
5
6
|
|
6
7
|
module Segment
|
7
8
|
class Analytics
|
@@ -24,10 +25,11 @@ module Segment
|
|
24
25
|
symbolize_keys! options
|
25
26
|
@queue = queue
|
26
27
|
@write_key = write_key
|
27
|
-
@
|
28
|
-
|
29
|
-
@batch =
|
28
|
+
@on_error = options[:on_error] || proc { |status, error| }
|
29
|
+
batch_size = options[:batch_size] || Defaults::MessageBatch::MAX_SIZE
|
30
|
+
@batch = MessageBatch.new(batch_size)
|
30
31
|
@lock = Mutex.new
|
32
|
+
@request = Request.new
|
31
33
|
end
|
32
34
|
|
33
35
|
# public: Continuously runs the loop to check for new events
|
@@ -37,14 +39,13 @@ module Segment
|
|
37
39
|
return if @queue.empty?
|
38
40
|
|
39
41
|
@lock.synchronize do
|
40
|
-
until @batch.
|
41
|
-
@batch << @queue.pop
|
42
|
+
until @batch.full? || @queue.empty?
|
43
|
+
@batch << Message.new(@queue.pop)
|
42
44
|
end
|
43
45
|
end
|
44
46
|
|
45
|
-
res =
|
46
|
-
|
47
|
-
@on_error.call res.status, res.error unless res.status == 200
|
47
|
+
res = @request.post(@write_key, @batch)
|
48
|
+
@on_error.call(res.status, res.error) unless res.status == 200
|
48
49
|
|
49
50
|
@lock.synchronize { @batch.clear }
|
50
51
|
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'faraday'
|
2
|
+
require 'pmap'
|
3
|
+
|
4
|
+
class RunscopeClient
|
5
|
+
def initialize(api_token)
|
6
|
+
headers = { 'Authorization' => "Bearer #{api_token}" }
|
7
|
+
@conn = Faraday.new('https://api.runscope.com', headers: headers)
|
8
|
+
end
|
9
|
+
|
10
|
+
def requests(bucket_key)
|
11
|
+
with_retries(3) do
|
12
|
+
response = @conn.get("/buckets/#{bucket_key}/messages", count: 20)
|
13
|
+
|
14
|
+
raise "Runscope error. #{response.body}" unless response.status == 200
|
15
|
+
|
16
|
+
message_uuids = JSON.parse(response.body)['data'].map { |message|
|
17
|
+
message.fetch('uuid')
|
18
|
+
}
|
19
|
+
|
20
|
+
message_uuids.pmap { |uuid|
|
21
|
+
response = @conn.get("/buckets/#{bucket_key}/messages/#{uuid}")
|
22
|
+
raise "Runscope error. #{response.body}" unless response.status == 200
|
23
|
+
JSON.parse(response.body).fetch('data').fetch('request')
|
24
|
+
}
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def with_retries(max_retries)
|
31
|
+
retries ||= 0
|
32
|
+
yield
|
33
|
+
rescue StandardError => e
|
34
|
+
retries += 1
|
35
|
+
retry if retries < max_retries
|
36
|
+
raise e
|
37
|
+
end
|
38
|
+
end
|