snowplow-tracker 0.7.0.pre.alpha.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +41 -16
- data/lib/snowplow-tracker/emitters.rb +339 -154
- data/lib/snowplow-tracker/page.rb +55 -0
- data/lib/snowplow-tracker/payload.rb +28 -39
- data/lib/snowplow-tracker/self_describing_json.rb +92 -9
- data/lib/snowplow-tracker/subject.rb +289 -68
- data/lib/snowplow-tracker/timestamp.rb +93 -25
- data/lib/snowplow-tracker/tracker.rb +521 -257
- data/lib/snowplow-tracker/version.rb +26 -4
- data/lib/snowplow-tracker.rb +5 -5
- metadata +12 -33
- data/lib/snowplow-tracker/contracts.rb +0 -29
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright (c) 2013-
|
1
|
+
# Copyright (c) 2013-2021 Snowplow Analytics Ltd. All rights reserved.
|
2
2
|
#
|
3
3
|
# This program is licensed to you under the Apache License Version 2.0,
|
4
4
|
# and you may not use this file except in compliance with the Apache License Version 2.0.
|
@@ -9,239 +9,418 @@
|
|
9
9
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
10
10
|
# See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
|
11
11
|
|
12
|
-
# Author::
|
13
|
-
# Copyright:: Copyright (c) 2013-
|
12
|
+
# Author:: Snowplow Analytics Ltd
|
13
|
+
# Copyright:: Copyright (c) 2013-2021 Snowplow Analytics Ltd
|
14
14
|
# License:: Apache License Version 2.0
|
15
15
|
|
16
|
+
|
16
17
|
require 'net/https'
|
17
18
|
require 'set'
|
18
19
|
require 'logger'
|
19
|
-
require 'contracts'
|
20
20
|
|
21
21
|
module SnowplowTracker
|
22
|
-
|
22
|
+
# @see Emitter
|
23
|
+
# For logging Emitter activity messages
|
23
24
|
LOGGER = Logger.new(STDERR)
|
24
25
|
LOGGER.level = Logger::INFO
|
25
26
|
|
27
|
+
# This class sends events to the event collector. All {Tracker}s must have at
|
28
|
+
# least one associated Emitter or the subclass AsyncEmitter.
|
29
|
+
#
|
30
|
+
# The network settings are defined as part of the Emitter initalization. This
|
31
|
+
# table displays the default Emitter settings:
|
32
|
+
#
|
33
|
+
# | Property | Default setting |
|
34
|
+
# | --- | --- |
|
35
|
+
# | Protocol | HTTP |
|
36
|
+
# | Method | GET |
|
37
|
+
# | Buffer size | 1 |
|
38
|
+
# | Path | `/i` |
|
39
|
+
#
|
40
|
+
# The buffer size is the number of events which will be buffered before they
|
41
|
+
# are all sent simultaneously. The process of sending all buffered events is
|
42
|
+
# called "flushing". The default buffer size is 1 because GET requests can
|
43
|
+
# only contain one event.
|
44
|
+
#
|
45
|
+
# If you choose to use POST requests, the buffer_size defaults to 10, and the
|
46
|
+
# buffered events are all sent together in a single request. The default path
|
47
|
+
# is '/com.snowplowanalytics.snowplow/tp2' for Emitters using POST.
|
48
|
+
#
|
49
|
+
# # Logging
|
50
|
+
# Emitters log their activity to STDERR by default, using the Ruby standard
|
51
|
+
# library Logger class. A different logger can be configured during Emitter
|
52
|
+
# initialization. For example, to disable logging, you could provide
|
53
|
+
# `Logger.new(IO::NULL)` in the options hash.
|
54
|
+
#
|
55
|
+
# By default, only messages with priority "INFO" or higher will be logged.
|
56
|
+
# This can be changed at any time for the default logger, which is saved as a
|
57
|
+
# module constant (`LOGGER = Logger.new(STDERR)`). If you are not using the
|
58
|
+
# default logger, set the message level before initializing your Emitter.
|
59
|
+
#
|
60
|
+
# @see https://ruby-doc.org/stdlib-2.7.2/libdoc/logger/rdoc/Logger.html Logger documentation
|
61
|
+
#
|
62
|
+
# @example Changing the logger message level.
|
63
|
+
# require 'logger'
|
64
|
+
# SnowplowTracker::LOGGER.level = Logger::DEBUG
|
26
65
|
class Emitter
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
:protocol => Maybe[Or['http', 'https']],
|
32
|
-
:port => Maybe[Num],
|
33
|
-
:method => Maybe[Or['get', 'post']],
|
34
|
-
:buffer_size => Maybe[Num],
|
35
|
-
:on_success => Maybe[Func[Num => Any]],
|
36
|
-
:on_failure => Maybe[Func[Num, Hash => Any]],
|
37
|
-
:thread_count => Maybe[Num]
|
38
|
-
})
|
39
|
-
|
40
|
-
@@StrictConfigHash = And[@@ConfigHash, lambda { |x|
|
41
|
-
x.class == Hash and Set.new(x.keys).subset? Set.new(@@ConfigHash.keys)
|
42
|
-
}]
|
43
|
-
|
44
|
-
@@DefaultConfig = {
|
45
|
-
:protocol => 'http',
|
46
|
-
:method => 'get'
|
66
|
+
# Default Emitter settings
|
67
|
+
DEFAULT_CONFIG = {
|
68
|
+
protocol: 'http',
|
69
|
+
method: 'get'
|
47
70
|
}
|
48
71
|
|
49
|
-
|
50
|
-
|
51
|
-
|
72
|
+
# @private
|
73
|
+
attr_reader :logger
|
74
|
+
|
75
|
+
# Create a new Emitter instance. The endpoint is required.
|
76
|
+
#
|
77
|
+
# @example Initializing an Emitter with all the possible extra configuration.
|
78
|
+
# success_callback = ->(success_count) { puts "#{success_count} events sent successfully" }
|
79
|
+
# failure_callback = ->(success_count, failures) do
|
80
|
+
# puts "#{success_count} events sent successfully, #{failures.size} sent unsuccessfully"
|
81
|
+
# end
|
82
|
+
#
|
83
|
+
# SnowplowTracker::Emitter.new(endpoint: 'collector.example.com',
|
84
|
+
# options: { path: '/my-pipeline/1',
|
85
|
+
# protocol: 'https',
|
86
|
+
# port: 443,
|
87
|
+
# method: 'post',
|
88
|
+
# buffer_size: 5,
|
89
|
+
# on_success: success_callback,
|
90
|
+
# on_failure: failure_callback,
|
91
|
+
# logger: Logger.new(STDOUT) })
|
92
|
+
#
|
93
|
+
# The options hash can have any of these optional parameters:
|
94
|
+
#
|
95
|
+
# | Parameter | Description | Type |
|
96
|
+
# | --- | --- | --- |
|
97
|
+
# | path | Override the default path for appending to the endpoint | String |
|
98
|
+
# | protocol | 'http' or 'https' | String |
|
99
|
+
# | port | The port for the connection | Integer |
|
100
|
+
# | method | 'get' or 'post' | String |
|
101
|
+
# | buffer_size | Number of events to send at once | Integer |
|
102
|
+
# | on_success | A method to call if events were sent successfully | Method |
|
103
|
+
# | on_failure | A method to call if events did not send | Method |
|
104
|
+
# | thread_count | Number of threads to use | Integer |
|
105
|
+
# | logger | Log somewhere other than STDERR | Logger |
|
106
|
+
#
|
107
|
+
# Note that `thread_count` is relevant only to the subclass {AsyncEmitter},
|
108
|
+
# and will be ignored if provided to an Emitter.
|
109
|
+
#
|
110
|
+
# If you choose to use HTTPS, we recommend using port 443.
|
111
|
+
#
|
112
|
+
# Only 2xx and 3xx status codes are considered successes.
|
113
|
+
#
|
114
|
+
# The `on_success` callback should accept one argument: the number of
|
115
|
+
# requests sent this way. The `on_failure` callback should accept two
|
116
|
+
# arguments: the number of successfully sent events, and an array containing
|
117
|
+
# the unsuccessful events.
|
118
|
+
#
|
119
|
+
# @param endpoint [String] the endpoint to send the events to
|
120
|
+
# @param options [Hash] allowed configuration options
|
121
|
+
#
|
122
|
+
# @see AsyncEmitter#initialize
|
123
|
+
# @api public
|
124
|
+
def initialize(endpoint:, options: {})
|
125
|
+
config = DEFAULT_CONFIG.merge(options)
|
52
126
|
@lock = Monitor.new
|
53
|
-
|
127
|
+
path = confirm_path(config)
|
128
|
+
@collector_uri = create_collector_uri(endpoint, config[:protocol], config[:port], path)
|
54
129
|
@buffer = []
|
55
|
-
|
56
|
-
@buffer_size = config[:buffer_size]
|
57
|
-
elsif config[:method] == 'get'
|
58
|
-
@buffer_size = 1
|
59
|
-
else
|
60
|
-
@buffer_size = 10
|
61
|
-
end
|
130
|
+
@buffer_size = confirm_buffer_size(config)
|
62
131
|
@method = config[:method]
|
63
132
|
@on_success = config[:on_success]
|
64
133
|
@on_failure = config[:on_failure]
|
65
|
-
|
134
|
+
@logger = config[:logger] || LOGGER
|
135
|
+
logger.info("#{self.class} initialized with endpoint #{@collector_uri}")
|
136
|
+
end
|
137
|
+
|
138
|
+
# Creates the `@buffer_size` variable during initialization. Unless
|
139
|
+
# otherwise defined, it's 1 for Emitters using GET and 10 for Emitters using
|
140
|
+
# POST requests.
|
141
|
+
# @private
|
142
|
+
def confirm_buffer_size(config)
|
143
|
+
return config[:buffer_size] unless config[:buffer_size].nil?
|
66
144
|
|
67
|
-
|
145
|
+
config[:method] == 'get' ? 1 : 10
|
68
146
|
end
|
69
147
|
|
70
|
-
#
|
71
|
-
#
|
72
|
-
|
73
|
-
def
|
74
|
-
|
75
|
-
|
148
|
+
# Creates the `@path` variable during initialization. Allows a non-standard
|
149
|
+
# path to be provided.
|
150
|
+
# @private
|
151
|
+
def confirm_path(config)
|
152
|
+
return config[:path] unless config[:path].nil?
|
153
|
+
|
154
|
+
config[:method] == 'get' ? '/i' : '/com.snowplowanalytics.snowplow/tp2'
|
155
|
+
end
|
156
|
+
|
157
|
+
# Creates the `@collector_uri` variable during initialization.
|
158
|
+
# The default is "http://{endpoint}/i".
|
159
|
+
# @private
|
160
|
+
def create_collector_uri(endpoint, protocol, port, path)
|
161
|
+
port_string = port.nil? ? '' : ":#{port}"
|
76
162
|
|
77
163
|
"#{protocol}://#{endpoint}#{port_string}#{path}"
|
78
164
|
end
|
79
165
|
|
80
|
-
# Add an event to the buffer and flush it if maximum size has been reached
|
166
|
+
# Add an event to the buffer and flush it if maximum size has been reached.
|
167
|
+
# This method is not required for standard Ruby tracker usage. A {Tracker}
|
168
|
+
# privately calls this method once the event payload is ready to send.
|
169
|
+
#
|
170
|
+
# We have included it as part of the public API for its possible use in the
|
171
|
+
# `on_failure` callback. This is the optional method, provided in the
|
172
|
+
# `options` Emitter initalization hash, that is called when events fail
|
173
|
+
# to send. You could use {#input} as part of your callback to immediately
|
174
|
+
# retry the failed event.
|
81
175
|
#
|
82
|
-
|
176
|
+
# The `on_failure` callback should accept two arguments: the number of
|
177
|
+
# successfully sent events, and an array containing the unsuccessful events.
|
178
|
+
#
|
179
|
+
# @example A possible `on_failure` method using `#input`
|
180
|
+
# def retry_on_failure(failed_event_count, failed_events)
|
181
|
+
# # possible backoff-and-retry timeout here
|
182
|
+
# failed_events.each do |event|
|
183
|
+
# my_emitter.input(event)
|
184
|
+
# end
|
185
|
+
# end
|
186
|
+
#
|
187
|
+
# @api public
|
83
188
|
def input(payload)
|
84
|
-
payload.each { |k,v| payload[k] = v.to_s}
|
189
|
+
payload.each { |k, v| payload[k] = v.to_s }
|
85
190
|
@lock.synchronize do
|
86
191
|
@buffer.push(payload)
|
87
|
-
if @buffer.size >= @buffer_size
|
88
|
-
flush
|
89
|
-
end
|
192
|
+
flush if @buffer.size >= @buffer_size
|
90
193
|
end
|
91
194
|
|
92
195
|
nil
|
93
196
|
end
|
94
197
|
|
95
|
-
# Flush the
|
198
|
+
# Flush the Emitter, forcing it to send all the events in its
|
199
|
+
# buffer, even if the buffer is not full. {Emitter} objects, unlike
|
200
|
+
# {AsyncEmitter}s, can only `flush` synchronously. A {Tracker} can manually flush all
|
201
|
+
# its Emitters by calling {Tracker#flush}, part of the public API which
|
202
|
+
# calls this method.
|
96
203
|
#
|
97
|
-
|
98
|
-
|
204
|
+
# The unused async parameter here is to avoid ArgumentError, since
|
205
|
+
# {AsyncEmitter#flush} does take an argument.
|
206
|
+
#
|
207
|
+
# @see AsyncEmitter#flush
|
208
|
+
# @private
|
209
|
+
def flush(_async = true)
|
99
210
|
@lock.synchronize do
|
100
211
|
send_requests(@buffer)
|
101
212
|
@buffer = []
|
102
213
|
end
|
214
|
+
|
103
215
|
nil
|
104
216
|
end
|
105
217
|
|
106
218
|
# Send all events in the buffer to the collector
|
107
|
-
#
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
LOGGER.info("Skipping sending events since buffer is empty")
|
219
|
+
# @private
|
220
|
+
def send_requests(events)
|
221
|
+
if events.empty?
|
222
|
+
logger.info('Skipping sending events since buffer is empty')
|
112
223
|
return
|
113
224
|
end
|
114
|
-
LOGGER.info("Attempting to send #{evts.size} request#{evts.size == 1 ? '' : 's'}")
|
115
225
|
|
116
|
-
|
117
|
-
|
226
|
+
logger.info("Attempting to send #{events.size} request#{events.size == 1 ? '' : 's'}")
|
227
|
+
|
228
|
+
events.each do |event|
|
229
|
+
# add the sent timestamp, overwrite if already exists
|
230
|
+
event['stm'] = Timestamp.create.to_s
|
118
231
|
end
|
119
232
|
|
120
233
|
if @method == 'post'
|
121
|
-
|
122
|
-
begin
|
123
|
-
request = http_post(SelfDescribingJson.new(
|
124
|
-
'iglu:com.snowplowanalytics.snowplow/payload_data/jsonschema/1-0-4',
|
125
|
-
evts
|
126
|
-
).to_json)
|
127
|
-
post_succeeded = is_good_status_code(request.code)
|
128
|
-
rescue StandardError => se
|
129
|
-
LOGGER.warn(se)
|
130
|
-
end
|
131
|
-
if post_succeeded
|
132
|
-
unless @on_success.nil?
|
133
|
-
@on_success.call(evts.size)
|
134
|
-
end
|
135
|
-
else
|
136
|
-
unless @on_failure.nil?
|
137
|
-
@on_failure.call(0, evts)
|
138
|
-
end
|
139
|
-
end
|
140
|
-
|
234
|
+
send_requests_with_post(events)
|
141
235
|
elsif @method == 'get'
|
142
|
-
|
143
|
-
unsent_requests = []
|
144
|
-
evts.each do |evt|
|
145
|
-
get_succeeded = false
|
146
|
-
begin
|
147
|
-
request = http_get(evt)
|
148
|
-
get_succeeded = is_good_status_code(request.code)
|
149
|
-
rescue StandardError => se
|
150
|
-
LOGGER.warn(se)
|
151
|
-
end
|
152
|
-
if get_succeeded
|
153
|
-
success_count += 1
|
154
|
-
else
|
155
|
-
unsent_requests << evt
|
156
|
-
end
|
157
|
-
end
|
158
|
-
if unsent_requests.size == 0
|
159
|
-
unless @on_success.nil?
|
160
|
-
@on_success.call(success_count)
|
161
|
-
end
|
162
|
-
else
|
163
|
-
unless @on_failure.nil?
|
164
|
-
@on_failure.call(success_count, unsent_requests)
|
165
|
-
end
|
166
|
-
end
|
236
|
+
send_requests_with_get(events)
|
167
237
|
end
|
168
238
|
|
169
239
|
nil
|
170
240
|
end
|
171
241
|
|
172
|
-
#
|
173
|
-
#
|
174
|
-
|
242
|
+
# Part of {#send_requests}.
|
243
|
+
# @private
|
244
|
+
def send_requests_with_post(events)
|
245
|
+
post_succeeded = false
|
246
|
+
begin
|
247
|
+
request = http_post(SelfDescribingJson.new(
|
248
|
+
'iglu:com.snowplowanalytics.snowplow/payload_data/jsonschema/1-0-4',
|
249
|
+
events
|
250
|
+
).to_json)
|
251
|
+
post_succeeded = good_status_code?(request.code)
|
252
|
+
rescue StandardError => standard_error
|
253
|
+
logger.warn(standard_error)
|
254
|
+
end
|
255
|
+
|
256
|
+
if post_succeeded
|
257
|
+
@on_success.call(events.size) unless @on_success.nil?
|
258
|
+
else
|
259
|
+
@on_failure.call(0, events) unless @on_failure.nil?
|
260
|
+
end
|
261
|
+
|
262
|
+
nil
|
263
|
+
end
|
264
|
+
|
265
|
+
# Part of {#send_requests}.
|
266
|
+
# @private
|
267
|
+
def send_requests_with_get(events)
|
268
|
+
success_count = 0
|
269
|
+
unsent_requests = []
|
270
|
+
|
271
|
+
events.each do |event|
|
272
|
+
request = process_get_event(event)
|
273
|
+
request ? success_count += 1 : unsent_requests << event
|
274
|
+
end
|
275
|
+
|
276
|
+
if unsent_requests.size.zero?
|
277
|
+
@on_success.call(success_count) unless @on_success.nil?
|
278
|
+
else
|
279
|
+
@on_failure.call(success_count, unsent_requests) unless @on_failure.nil?
|
280
|
+
end
|
281
|
+
|
282
|
+
nil
|
283
|
+
end
|
284
|
+
|
285
|
+
# Part of {#send_requests_with_get}.
|
286
|
+
# @private
|
287
|
+
def process_get_event(event)
|
288
|
+
get_succeeded = false
|
289
|
+
begin
|
290
|
+
request = http_get(event)
|
291
|
+
get_succeeded = good_status_code?(request.code)
|
292
|
+
rescue StandardError => standard_error
|
293
|
+
logger.warn(standard_error)
|
294
|
+
end
|
295
|
+
get_succeeded
|
296
|
+
end
|
297
|
+
|
298
|
+
# Part of {#process_get_event}. This sends a GET request.
|
299
|
+
# @private
|
175
300
|
def http_get(payload)
|
176
301
|
destination = URI(@collector_uri + '?' + URI.encode_www_form(payload))
|
177
|
-
|
178
|
-
|
302
|
+
logger.info("Sending GET request to #{@collector_uri}...")
|
303
|
+
logger.debug("Payload: #{payload}")
|
179
304
|
http = Net::HTTP.new(destination.host, destination.port)
|
180
305
|
request = Net::HTTP::Get.new(destination.request_uri)
|
181
|
-
if destination.scheme == 'https'
|
182
|
-
http.use_ssl = true
|
183
|
-
end
|
306
|
+
http.use_ssl = true if destination.scheme == 'https'
|
184
307
|
response = http.request(request)
|
185
|
-
|
308
|
+
logger.add(good_status_code?(response.code) ? Logger::INFO : Logger::WARN) do
|
186
309
|
"GET request to #{@collector_uri} finished with status code #{response.code}"
|
187
|
-
|
310
|
+
end
|
188
311
|
|
189
312
|
response
|
190
313
|
end
|
191
314
|
|
192
|
-
#
|
193
|
-
#
|
194
|
-
Contract Hash => lambda { |x| x.is_a? Net::HTTPResponse }
|
315
|
+
# Part of {#send_requests_with_post}. This sends a POST request.
|
316
|
+
# @private
|
195
317
|
def http_post(payload)
|
196
|
-
|
197
|
-
|
318
|
+
logger.info("Sending POST request to #{@collector_uri}...")
|
319
|
+
logger.debug("Payload: #{payload}")
|
198
320
|
destination = URI(@collector_uri)
|
199
321
|
http = Net::HTTP.new(destination.host, destination.port)
|
200
322
|
request = Net::HTTP::Post.new(destination.request_uri)
|
201
|
-
if destination.scheme == 'https'
|
202
|
-
http.use_ssl = true
|
203
|
-
end
|
323
|
+
http.use_ssl = true if destination.scheme == 'https'
|
204
324
|
request.body = payload.to_json
|
205
325
|
request.set_content_type('application/json; charset=utf-8')
|
206
326
|
response = http.request(request)
|
207
|
-
|
327
|
+
logger.add(good_status_code?(response.code) ? Logger::INFO : Logger::WARN) do
|
208
328
|
"POST request to #{@collector_uri} finished with status code #{response.code}"
|
209
|
-
|
329
|
+
end
|
210
330
|
|
211
331
|
response
|
212
332
|
end
|
213
333
|
|
214
|
-
#
|
215
|
-
#
|
216
|
-
|
217
|
-
def
|
334
|
+
# Check if the response is good.
|
335
|
+
# Only 2xx and 3xx status codes are considered successes.
|
336
|
+
# @private
|
337
|
+
def good_status_code?(status_code)
|
218
338
|
status_code.to_i >= 200 && status_code.to_i < 400
|
219
339
|
end
|
220
340
|
|
221
|
-
private :
|
341
|
+
private :create_collector_uri,
|
222
342
|
:http_get,
|
223
343
|
:http_post
|
224
|
-
|
225
344
|
end
|
226
345
|
|
227
|
-
|
346
|
+
# This {Emitter} subclass provides asynchronous event sending. Whenever the
|
347
|
+
# buffer is flushed, the AsyncEmitter places the flushed events in a work
|
348
|
+
# queue. The AsyncEmitter asynchronously sends events in this queue using a
|
349
|
+
# thread pool of a fixed size. The size of the thread pool is 1 by default,
|
350
|
+
# but can be configured as part of the options hash during initialization.
|
351
|
+
#
|
352
|
+
# @see Emitter
|
353
|
+
# @api public
|
228
354
|
class AsyncEmitter < Emitter
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
355
|
+
# Create a new AsyncEmitter object. The endpoint is required.
|
356
|
+
#
|
357
|
+
# @example Initializing an AsyncEmitter with all the possible extra configuration.
|
358
|
+
# success_callback = ->(success_count) { puts "#{success_count} events sent successfully" }
|
359
|
+
# failure_callback = ->(success_count, failures) do
|
360
|
+
# puts "#{success_count} events sent successfully, #{failures.size} sent unsuccessfully"
|
361
|
+
# end
|
362
|
+
#
|
363
|
+
# SnowplowTracker::Emitter.new(endpoint: 'collector.example.com',
|
364
|
+
# options: { path: '/my-pipeline/1',
|
365
|
+
# protocol: 'https',
|
366
|
+
# port: 443,
|
367
|
+
# method: 'post',
|
368
|
+
# buffer_size: 5,
|
369
|
+
# on_success: success_callback,
|
370
|
+
# on_failure: failure_callback,
|
371
|
+
# logger: Logger.new(STDOUT),
|
372
|
+
# thread_count: 5 })
|
373
|
+
#
|
374
|
+
# The options hash can have any of these optional parameters:
|
375
|
+
#
|
376
|
+
# | Parameter | Description | Type |
|
377
|
+
# | --- | --- | --- |
|
378
|
+
# | path | Override the default path for appending to the endpoint | String |
|
379
|
+
# | protocol | 'http' or 'https' | String |
|
380
|
+
# | port | The port for the connection | Integer |
|
381
|
+
# | method | 'get' or 'post' | String |
|
382
|
+
# | buffer_size | Number of events to send at once | Integer |
|
383
|
+
# | on_success | A function to call if events were sent successfully | Function |
|
384
|
+
# | on_failure | A function to call if events did not send | Function |
|
385
|
+
# | thread_count | Number of threads to use | Integer |
|
386
|
+
# | logger | Log somewhere other than STDERR | Logger |
|
387
|
+
#
|
388
|
+
# The `thread_count` determines the number of worker threads which will be
|
389
|
+
# used to send events.
|
390
|
+
#
|
391
|
+
# If you choose to use HTTPS, we recommend using port 443.
|
392
|
+
#
|
393
|
+
# Only 2xx and 3xx status codes are considered successes.
|
394
|
+
#
|
395
|
+
# The `on_success` callback should accept one argument: the number of
|
396
|
+
# requests sent this way. The `on_failure` callback should accept two
|
397
|
+
# arguments: the number of successfully sent events, and an array containing
|
398
|
+
# the unsuccessful events.
|
399
|
+
#
|
400
|
+
# @note if you test the AsyncEmitter by using a short script to send an
|
401
|
+
# event, you may find that the event fails to send. This is because the
|
402
|
+
# process exits before the flushing thread is finished. You can get round
|
403
|
+
# this either by adding a sleep(10) to the end of your script or by using
|
404
|
+
# the synchronous flush.
|
405
|
+
#
|
406
|
+
# @param endpoint [String] the endpoint to send the events to
|
407
|
+
# @param options [Hash] allowed configuration options
|
408
|
+
#
|
409
|
+
# @see Emitter#initialize
|
410
|
+
# @api public
|
411
|
+
def initialize(endpoint:, options: {})
|
412
|
+
@queue = Queue.new
|
233
413
|
# @all_processed_condition and @results_unprocessed are used to emulate Python's Queue.task_done()
|
234
414
|
@queue.extend(MonitorMixin)
|
235
415
|
@all_processed_condition = @queue.new_cond
|
236
416
|
@results_unprocessed = 0
|
237
|
-
(
|
238
|
-
|
239
|
-
consume
|
240
|
-
end
|
241
|
-
end
|
242
|
-
super(endpoint, config)
|
417
|
+
(options[:thread_count] || 1).times { Thread.new { consume } }
|
418
|
+
super(endpoint: endpoint, options: options)
|
243
419
|
end
|
244
420
|
|
421
|
+
# AsyncEmitters use the MonitorMixin module, which provides the
|
422
|
+
# `synchronize` and `broadcast` methods.
|
423
|
+
# @private
|
245
424
|
def consume
|
246
425
|
loop do
|
247
426
|
work_unit = @queue.pop
|
@@ -253,28 +432,34 @@ module SnowplowTracker
|
|
253
432
|
end
|
254
433
|
end
|
255
434
|
|
256
|
-
# Flush the buffer
|
257
|
-
#
|
435
|
+
# Flush the Emitter, forcing it to send all the events in its buffer, even
|
436
|
+
# if the buffer is not full.
|
258
437
|
#
|
259
|
-
|
438
|
+
# If `async` is true (the default), events are sent even if the queue is not
|
439
|
+
# empty. If `async` is false, it blocks until all queued events have been
|
440
|
+
# sent. Note that this method can be called by public API method
|
441
|
+
# {Tracker#flush}, which has a default of `async` being false.
|
442
|
+
#
|
443
|
+
# @param async [Bool] whether to flush asynchronously or not
|
444
|
+
#
|
445
|
+
# @see Emitter#flush
|
446
|
+
# @private
|
447
|
+
def flush(async = true)
|
260
448
|
loop do
|
261
449
|
@lock.synchronize do
|
262
|
-
@queue.synchronize
|
263
|
-
@results_unprocessed += 1
|
264
|
-
end
|
450
|
+
@queue.synchronize { @results_unprocessed += 1 }
|
265
451
|
@queue << @buffer
|
266
452
|
@buffer = []
|
267
453
|
end
|
268
|
-
|
269
|
-
|
454
|
+
unless async
|
455
|
+
logger.info('Starting synchronous flush')
|
270
456
|
@queue.synchronize do
|
271
457
|
@all_processed_condition.wait_while { @results_unprocessed > 0 }
|
272
|
-
|
458
|
+
logger.info('Finished synchronous flush')
|
273
459
|
end
|
274
460
|
end
|
275
|
-
break if @buffer.
|
461
|
+
break if @buffer.empty?
|
276
462
|
end
|
277
463
|
end
|
278
464
|
end
|
279
|
-
|
280
465
|
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
# Copyright (c) 2013-2021 Snowplow Analytics Ltd. All rights reserved.
|
2
|
+
#
|
3
|
+
# This program is licensed to you under the Apache License Version 2.0,
|
4
|
+
# and you may not use this file except in compliance with the Apache License Version 2.0.
|
5
|
+
# You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
|
6
|
+
#
|
7
|
+
# Unless required by applicable law or agreed to in writing,
|
8
|
+
# software distributed under the Apache License Version 2.0 is distributed on an
|
9
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
10
|
+
# See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
|
11
|
+
|
12
|
+
# Author:: Snowplow Analytics Ltd
|
13
|
+
# Copyright:: Copyright (c) 2013-2021 Snowplow Analytics Ltd
|
14
|
+
# License:: Apache License Version 2.0
|
15
|
+
|
16
|
+
|
17
|
+
module SnowplowTracker
|
18
|
+
# If the Ruby tracker is incorporated into a website server, the events
|
19
|
+
# tracked will describe user activity on specific webpages. Knowing on which
|
20
|
+
# page an event occurred can be very valuable.
|
21
|
+
#
|
22
|
+
# Add page URL, page title and referrer URL to any event by adding a Page
|
23
|
+
# object to any {Tracker} `#track_x_event` method call.
|
24
|
+
#
|
25
|
+
# Page parameters are saved into the tracked event as part of the 'atomic'
|
26
|
+
# event properties, which have their own column in the eventual events table.
|
27
|
+
# For example, a Page's `page_url` parameter will be sent as `url` in the
|
28
|
+
# raw event payload, ending up in the `page_url` column.
|
29
|
+
#
|
30
|
+
#
|
31
|
+
# @note For {Tracker#track_page_view}, properties set in the Page object will
|
32
|
+
# override those properties given as arguments.
|
33
|
+
class Page
|
34
|
+
# @return [Hash] the stored page properties
|
35
|
+
attr_reader :details
|
36
|
+
|
37
|
+
# Create a Page object for attaching page properties to events.
|
38
|
+
#
|
39
|
+
# Page properties will directly populate the event's `page_url`, `page_title` and `referrer` parameters.
|
40
|
+
#
|
41
|
+
# @example Creating a Page
|
42
|
+
# SnowplowTracker::Page.new(page_url: 'http://www.example.com/second-page',
|
43
|
+
# page_title: 'Example title',
|
44
|
+
# referrer: 'http://www.example.com/first-page')
|
45
|
+
#
|
46
|
+
# @param page_url [String] the page URL
|
47
|
+
# @param page_title [String] the title of the page
|
48
|
+
# @param referrer [String] the URL of the previous page
|
49
|
+
def initialize(page_url: nil, page_title: nil, referrer: nil)
|
50
|
+
@details = { 'url' => page_url,
|
51
|
+
'page' => page_title,
|
52
|
+
'refr' => referrer }
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|