snowplow-tracker 0.7.0.pre.alpha.2 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +35 -13
- data/lib/snowplow-tracker/emitters.rb +355 -147
- data/lib/snowplow-tracker/page.rb +60 -0
- data/lib/snowplow-tracker/payload.rb +30 -34
- data/lib/snowplow-tracker/self_describing_json.rb +92 -9
- data/lib/snowplow-tracker/subject.rb +282 -59
- data/lib/snowplow-tracker/timestamp.rb +95 -23
- data/lib/snowplow-tracker/tracker.rb +547 -242
- data/lib/snowplow-tracker/version.rb +33 -4
- data/lib/snowplow-tracker.rb +5 -5
- metadata +15 -16
- data/lib/snowplow-tracker/contracts.rb +0 -29
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright (c) 2013-
|
1
|
+
# Copyright (c) 2013-2021 Snowplow Analytics Ltd. All rights reserved.
|
2
2
|
#
|
3
3
|
# This program is licensed to you under the Apache License Version 2.0,
|
4
4
|
# and you may not use this file except in compliance with the Apache License Version 2.0.
|
@@ -9,239 +9,441 @@
|
|
9
9
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
10
10
|
# See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
|
11
11
|
|
12
|
-
# Author::
|
13
|
-
# Copyright:: Copyright (c) 2013-
|
12
|
+
# Author:: Snowplow Analytics Ltd
|
13
|
+
# Copyright:: Copyright (c) 2013-2021 Snowplow Analytics Ltd
|
14
14
|
# License:: Apache License Version 2.0
|
15
15
|
|
16
|
+
|
16
17
|
require 'net/https'
|
17
18
|
require 'set'
|
18
19
|
require 'logger'
|
19
20
|
require 'contracts'
|
20
21
|
|
21
22
|
module SnowplowTracker
|
22
|
-
|
23
|
+
# @see Emitter
|
24
|
+
# For logging Emitter activity messages
|
23
25
|
LOGGER = Logger.new(STDERR)
|
24
26
|
LOGGER.level = Logger::INFO
|
25
27
|
|
28
|
+
# This class sends events to the event collector. All {Tracker}s must have at
|
29
|
+
# least one associated Emitter or the subclass AsyncEmitter.
|
30
|
+
#
|
31
|
+
# The network settings are defined as part of the Emitter initalization. This
|
32
|
+
# table displays the default Emitter settings:
|
33
|
+
#
|
34
|
+
# | Property | Default setting |
|
35
|
+
# | --- | --- |
|
36
|
+
# | Protocol | HTTP |
|
37
|
+
# | Method | GET |
|
38
|
+
# | Buffer size | 1 |
|
39
|
+
# | Path | `/i` |
|
40
|
+
#
|
41
|
+
# The buffer size is 1 because GET requests can only contain one event.
|
42
|
+
#
|
43
|
+
# If you choose to use POST requests, the buffer_size defaults to 10, and the
|
44
|
+
# buffered events are all sent together in a single request. The default path
|
45
|
+
# is '/com.snowplowanalytics.snowplow/tp2' for Emitters using POST.
|
46
|
+
#
|
47
|
+
# # Logging
|
48
|
+
# Emitters log their activity to STDERR by default, using the Ruby standard
|
49
|
+
# library Logger class. A different logger can be configured during Emitter
|
50
|
+
# initialization. For example, to disable logging, you could provide
|
51
|
+
# `Logger.new(IO::NULL)` in the options hash.
|
52
|
+
#
|
53
|
+
# By default, only messages with priority "INFO" or higher will be logged.
|
54
|
+
# This can be changed at any time for the default logger, which is saved as a
|
55
|
+
# module constant (`LOGGER = Logger.new(STDERR)`). If you are not using the
|
56
|
+
# default logger, set the message level before initializing your Emitter.
|
57
|
+
#
|
58
|
+
# @see https://ruby-doc.org/stdlib-2.7.2/libdoc/logger/rdoc/Logger.html Logger documentation
|
59
|
+
#
|
60
|
+
# @example Changing the logger message level.
|
61
|
+
# require 'logger'
|
62
|
+
# SnowplowTracker::LOGGER.level = Logger::DEBUG
|
26
63
|
class Emitter
|
27
|
-
|
28
64
|
include Contracts
|
29
65
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
:
|
35
|
-
:
|
36
|
-
:
|
37
|
-
:
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
66
|
+
# Contract types
|
67
|
+
|
68
|
+
# @private
|
69
|
+
CONFIG_HASH = {
|
70
|
+
path: Maybe[String],
|
71
|
+
protocol: Maybe[Or['http', 'https']],
|
72
|
+
port: Maybe[Num],
|
73
|
+
method: Maybe[Or['get', 'post']],
|
74
|
+
buffer_size: Maybe[Num],
|
75
|
+
on_success: Maybe[Func[Num => Any]],
|
76
|
+
on_failure: Maybe[Func[Num, Hash => Any]],
|
77
|
+
thread_count: Maybe[Num],
|
78
|
+
logger: Maybe[Logger]
|
79
|
+
}
|
80
|
+
|
81
|
+
# @private
|
82
|
+
STRICT_CONFIG_HASH = And[CONFIG_HASH, ->(x) {
|
83
|
+
(x.class == Hash) && Set.new(x.keys).subset?(Set.new(CONFIG_HASH.keys))
|
84
|
+
}]
|
85
|
+
|
86
|
+
# @!group Public constants
|
87
|
+
|
88
|
+
# Default Emitter settings
|
89
|
+
DEFAULT_CONFIG = {
|
90
|
+
protocol: 'http',
|
91
|
+
method: 'get'
|
47
92
|
}
|
48
93
|
|
49
|
-
|
50
|
-
|
51
|
-
|
94
|
+
# @!endgroup
|
95
|
+
|
96
|
+
# @private
|
97
|
+
attr_reader :logger
|
98
|
+
|
99
|
+
Contract KeywordArgs[endpoint: String, options: Optional[STRICT_CONFIG_HASH]] => Any
|
100
|
+
# Create a new Emitter instance. The endpoint is required.
|
101
|
+
#
|
102
|
+
# @example Initializing an Emitter with all the possible extra configuration.
|
103
|
+
# success_callback = ->(success_count) { puts "#{success_count} events sent successfully" }
|
104
|
+
# failure_callback = ->(success_count, failures) do
|
105
|
+
# puts "#{success_count} events sent successfully, #{failures.size} sent unsuccessfully"
|
106
|
+
# end
|
107
|
+
#
|
108
|
+
# Emitter.new(endpoint: 'collector.example.com',
|
109
|
+
# options: { path: '/my-pipeline/1',
|
110
|
+
# protocol: 'https',
|
111
|
+
# port: 443,
|
112
|
+
# method: 'post',
|
113
|
+
# buffer_size: 5,
|
114
|
+
# on_success: success_callback,
|
115
|
+
# on_failure: failure_callback,
|
116
|
+
# logger: Logger.new(STDOUT) })
|
117
|
+
#
|
118
|
+
# The options hash can have any of these optional parameters:
|
119
|
+
#
|
120
|
+
# | Parameter | Description | Type |
|
121
|
+
# | --- | --- | --- |
|
122
|
+
# | path | Override the default path for appending to the endpoint | String |
|
123
|
+
# | protocol | 'http' or 'https' | String |
|
124
|
+
# | port | The port for the connection | Integer |
|
125
|
+
# | method | 'get' or 'post' | String |
|
126
|
+
# | buffer_size | Number of events to send at once | Integer |
|
127
|
+
# | on_success | A function to call if events were sent successfully | Function |
|
128
|
+
# | on_failure | A function to call if events did not send | Function |
|
129
|
+
# | thread_count | Number of threads to use | Integer |
|
130
|
+
# | logger | Log somewhere other than STDERR | Logger |
|
131
|
+
#
|
132
|
+
# Note that `thread_count` is relevant only to the subclass {AsyncEmitter},
|
133
|
+
# and will be ignored if provided to an Emitter.
|
134
|
+
#
|
135
|
+
# If you choose to use HTTPS, we recommend using port 443.
|
136
|
+
#
|
137
|
+
# @param endpoint [String] the endpoint to send the events to
|
138
|
+
# @param options [Hash] allowed configuration options
|
139
|
+
#
|
140
|
+
# @see AsyncEmitter#initialize
|
141
|
+
# @api public
|
142
|
+
def initialize(endpoint:, options: {})
|
143
|
+
config = DEFAULT_CONFIG.merge(options)
|
52
144
|
@lock = Monitor.new
|
53
|
-
|
145
|
+
path = confirm_path(config)
|
146
|
+
@collector_uri = create_collector_uri(endpoint, config[:protocol], config[:port], path)
|
54
147
|
@buffer = []
|
55
|
-
|
56
|
-
@buffer_size = config[:buffer_size]
|
57
|
-
elsif config[:method] == 'get'
|
58
|
-
@buffer_size = 1
|
59
|
-
else
|
60
|
-
@buffer_size = 10
|
61
|
-
end
|
148
|
+
@buffer_size = confirm_buffer_size(config)
|
62
149
|
@method = config[:method]
|
63
150
|
@on_success = config[:on_success]
|
64
151
|
@on_failure = config[:on_failure]
|
65
|
-
|
152
|
+
@logger = config[:logger] || LOGGER
|
153
|
+
logger.info("#{self.class} initialized with endpoint #{@collector_uri}")
|
154
|
+
end
|
155
|
+
|
156
|
+
Contract Hash => Num
|
157
|
+
# Creates the `@buffer_size` variable during initialization. Unless
|
158
|
+
# otherwise defined, it's 1 for Emitters using GET and 10 for Emitters using
|
159
|
+
# POST requests.
|
160
|
+
# @private
|
161
|
+
def confirm_buffer_size(config)
|
162
|
+
return config[:buffer_size] unless config[:buffer_size].nil?
|
163
|
+
|
164
|
+
config[:method] == 'get' ? 1 : 10
|
165
|
+
end
|
166
|
+
|
167
|
+
Contract Hash => String
|
168
|
+
# Creates the `@path` variable during initialization. Allows a non-standard
|
169
|
+
# path to be provided.
|
170
|
+
# @private
|
171
|
+
def confirm_path(config)
|
172
|
+
return config[:path] unless config[:path].nil?
|
66
173
|
|
67
|
-
|
174
|
+
config[:method] == 'get' ? '/i' : '/com.snowplowanalytics.snowplow/tp2'
|
68
175
|
end
|
69
176
|
|
70
177
|
# Build the collector URI from the configuration hash
|
71
178
|
#
|
72
179
|
Contract String, String, Maybe[Num], String => String
|
73
|
-
|
74
|
-
|
75
|
-
|
180
|
+
# Creates the `@collector_uri` variable during initialization.
|
181
|
+
# The default is "http://{endpoint}/i".
|
182
|
+
# @private
|
183
|
+
def create_collector_uri(endpoint, protocol, port, path)
|
184
|
+
port_string = port.nil? ? '' : ":#{port}"
|
76
185
|
|
77
186
|
"#{protocol}://#{endpoint}#{port_string}#{path}"
|
78
187
|
end
|
79
188
|
|
80
|
-
# Add an event to the buffer and flush it if maximum size has been reached
|
81
|
-
#
|
82
189
|
Contract Hash => nil
|
190
|
+
# Add an event to the buffer and flush it if maximum size has been reached.
|
191
|
+
# This method is not required for standard Ruby tracker usage. A {Tracker}
|
192
|
+
# privately calls this method once the event payload is ready to send.
|
193
|
+
#
|
194
|
+
# We have included it as part of the public API for its possible use in the
|
195
|
+
# `on_failure` callback. This is the optional method, provided in the
|
196
|
+
# `options` Emitter initalization hash, that is called when events fail
|
197
|
+
# to send. You could use {#input} as part of your callback to immediately
|
198
|
+
# retry the failed event.
|
199
|
+
#
|
200
|
+
# @example A possible `on_failure` method using `#input`
|
201
|
+
# def retry_on_failure(failed_event_count, failed_events)
|
202
|
+
# # possible backoff-and-retry timeout here
|
203
|
+
# failed_events.each do |event|
|
204
|
+
# my_emitter.input(event)
|
205
|
+
# end
|
206
|
+
# end
|
207
|
+
#
|
208
|
+
# @api public
|
83
209
|
def input(payload)
|
84
|
-
payload.each { |k,v| payload[k] = v.to_s}
|
210
|
+
payload.each { |k, v| payload[k] = v.to_s }
|
85
211
|
@lock.synchronize do
|
86
212
|
@buffer.push(payload)
|
87
|
-
if @buffer.size >= @buffer_size
|
88
|
-
flush
|
89
|
-
end
|
213
|
+
flush if @buffer.size >= @buffer_size
|
90
214
|
end
|
91
215
|
|
92
216
|
nil
|
93
217
|
end
|
94
218
|
|
95
|
-
# Flush the buffer
|
96
|
-
#
|
97
219
|
Contract Bool => nil
|
98
|
-
|
220
|
+
# Flush the Emitter, forcing it to send all the events in its
|
221
|
+
# buffer, even if the buffer is not full. {Emitter} objects, unlike
|
222
|
+
# {AsyncEmitter}s, can only `flush` synchronously. A {Tracker} can manually flush all
|
223
|
+
# its Emitters by calling {Tracker#flush}, part of the public API which
|
224
|
+
# calls this method.
|
225
|
+
#
|
226
|
+
# The unused async parameter here is to avoid ArgumentError, since
|
227
|
+
# {AsyncEmitter#flush} does take an argument.
|
228
|
+
#
|
229
|
+
# @see AsyncEmitter#flush
|
230
|
+
# @private
|
231
|
+
def flush(_async = true)
|
99
232
|
@lock.synchronize do
|
100
233
|
send_requests(@buffer)
|
101
234
|
@buffer = []
|
102
235
|
end
|
236
|
+
|
103
237
|
nil
|
104
238
|
end
|
105
239
|
|
106
|
-
# Send all events in the buffer to the collector
|
107
|
-
#
|
108
240
|
Contract ArrayOf[Hash] => nil
|
109
|
-
|
110
|
-
|
111
|
-
|
241
|
+
# Send all events in the buffer to the collector
|
242
|
+
# @private
|
243
|
+
def send_requests(events)
|
244
|
+
if events.empty?
|
245
|
+
logger.info('Skipping sending events since buffer is empty')
|
112
246
|
return
|
113
247
|
end
|
114
|
-
LOGGER.info("Attempting to send #{evts.size} request#{evts.size == 1 ? '' : 's'}")
|
115
248
|
|
116
|
-
|
117
|
-
|
249
|
+
logger.info("Attempting to send #{events.size} request#{events.size == 1 ? '' : 's'}")
|
250
|
+
|
251
|
+
events.each do |event|
|
252
|
+
# add the sent timestamp, overwrite if already exists
|
253
|
+
event['stm'] = Timestamp.create.to_s
|
118
254
|
end
|
119
255
|
|
120
256
|
if @method == 'post'
|
121
|
-
|
122
|
-
begin
|
123
|
-
request = http_post(SelfDescribingJson.new(
|
124
|
-
'iglu:com.snowplowanalytics.snowplow/payload_data/jsonschema/1-0-4',
|
125
|
-
evts
|
126
|
-
).to_json)
|
127
|
-
post_succeeded = is_good_status_code(request.code)
|
128
|
-
rescue StandardError => se
|
129
|
-
LOGGER.warn(se)
|
130
|
-
end
|
131
|
-
if post_succeeded
|
132
|
-
unless @on_success.nil?
|
133
|
-
@on_success.call(evts.size)
|
134
|
-
end
|
135
|
-
else
|
136
|
-
unless @on_failure.nil?
|
137
|
-
@on_failure.call(0, evts)
|
138
|
-
end
|
139
|
-
end
|
140
|
-
|
257
|
+
send_requests_with_post(events)
|
141
258
|
elsif @method == 'get'
|
142
|
-
|
143
|
-
unsent_requests = []
|
144
|
-
evts.each do |evt|
|
145
|
-
get_succeeded = false
|
146
|
-
begin
|
147
|
-
request = http_get(evt)
|
148
|
-
get_succeeded = is_good_status_code(request.code)
|
149
|
-
rescue StandardError => se
|
150
|
-
LOGGER.warn(se)
|
151
|
-
end
|
152
|
-
if get_succeeded
|
153
|
-
success_count += 1
|
154
|
-
else
|
155
|
-
unsent_requests << evt
|
156
|
-
end
|
157
|
-
end
|
158
|
-
if unsent_requests.size == 0
|
159
|
-
unless @on_success.nil?
|
160
|
-
@on_success.call(success_count)
|
161
|
-
end
|
162
|
-
else
|
163
|
-
unless @on_failure.nil?
|
164
|
-
@on_failure.call(success_count, unsent_requests)
|
165
|
-
end
|
166
|
-
end
|
259
|
+
send_requests_with_get(events)
|
167
260
|
end
|
168
261
|
|
169
262
|
nil
|
170
263
|
end
|
171
264
|
|
172
|
-
|
173
|
-
#
|
174
|
-
|
265
|
+
Contract ArrayOf[Hash] => nil
|
266
|
+
# Part of {#send_requests}.
|
267
|
+
# @private
|
268
|
+
def send_requests_with_post(events)
|
269
|
+
post_succeeded = false
|
270
|
+
begin
|
271
|
+
request = http_post(SelfDescribingJson.new(
|
272
|
+
'iglu:com.snowplowanalytics.snowplow/payload_data/jsonschema/1-0-4',
|
273
|
+
events
|
274
|
+
).to_json)
|
275
|
+
post_succeeded = good_status_code?(request.code)
|
276
|
+
rescue StandardError => standard_error
|
277
|
+
logger.warn(standard_error)
|
278
|
+
end
|
279
|
+
|
280
|
+
if post_succeeded
|
281
|
+
@on_success.call(events.size) unless @on_success.nil?
|
282
|
+
else
|
283
|
+
@on_failure.call(0, events) unless @on_failure.nil?
|
284
|
+
end
|
285
|
+
|
286
|
+
nil
|
287
|
+
end
|
288
|
+
|
289
|
+
Contract ArrayOf[Hash] => nil
|
290
|
+
# Part of {#send_requests}.
|
291
|
+
# @private
|
292
|
+
def send_requests_with_get(events)
|
293
|
+
success_count = 0
|
294
|
+
unsent_requests = []
|
295
|
+
|
296
|
+
events.each do |event|
|
297
|
+
request = process_get_event(event)
|
298
|
+
request ? success_count += 1 : unsent_requests << event
|
299
|
+
end
|
300
|
+
|
301
|
+
if unsent_requests.size.zero?
|
302
|
+
@on_success.call(success_count) unless @on_success.nil?
|
303
|
+
else
|
304
|
+
@on_failure.call(success_count, unsent_requests) unless @on_failure.nil?
|
305
|
+
end
|
306
|
+
|
307
|
+
nil
|
308
|
+
end
|
309
|
+
|
310
|
+
Contract Hash => Bool
|
311
|
+
# Part of {#send_requests_with_get}.
|
312
|
+
# @private
|
313
|
+
def process_get_event(event)
|
314
|
+
get_succeeded = false
|
315
|
+
begin
|
316
|
+
request = http_get(event)
|
317
|
+
get_succeeded = good_status_code?(request.code)
|
318
|
+
rescue StandardError => standard_error
|
319
|
+
logger.warn(standard_error)
|
320
|
+
end
|
321
|
+
get_succeeded
|
322
|
+
end
|
323
|
+
|
324
|
+
Contract Hash => ->(x) { x.is_a? Net::HTTPResponse }
|
325
|
+
# Part of {#process_get_event}. This sends a GET request.
|
326
|
+
# @private
|
175
327
|
def http_get(payload)
|
176
328
|
destination = URI(@collector_uri + '?' + URI.encode_www_form(payload))
|
177
|
-
|
178
|
-
|
329
|
+
logger.info("Sending GET request to #{@collector_uri}...")
|
330
|
+
logger.debug("Payload: #{payload}")
|
179
331
|
http = Net::HTTP.new(destination.host, destination.port)
|
180
332
|
request = Net::HTTP::Get.new(destination.request_uri)
|
181
|
-
if destination.scheme == 'https'
|
182
|
-
http.use_ssl = true
|
183
|
-
end
|
333
|
+
http.use_ssl = true if destination.scheme == 'https'
|
184
334
|
response = http.request(request)
|
185
|
-
|
335
|
+
logger.add(good_status_code?(response.code) ? Logger::INFO : Logger::WARN) do
|
186
336
|
"GET request to #{@collector_uri} finished with status code #{response.code}"
|
187
|
-
|
337
|
+
end
|
188
338
|
|
189
339
|
response
|
190
340
|
end
|
191
341
|
|
192
|
-
|
193
|
-
#
|
194
|
-
|
342
|
+
Contract Hash => ->(x) { x.is_a? Net::HTTPResponse }
|
343
|
+
# Part of {#send_requests_with_post}. This sends a POST request.
|
344
|
+
# @private
|
195
345
|
def http_post(payload)
|
196
|
-
|
197
|
-
|
346
|
+
logger.info("Sending POST request to #{@collector_uri}...")
|
347
|
+
logger.debug("Payload: #{payload}")
|
198
348
|
destination = URI(@collector_uri)
|
199
349
|
http = Net::HTTP.new(destination.host, destination.port)
|
200
350
|
request = Net::HTTP::Post.new(destination.request_uri)
|
201
|
-
if destination.scheme == 'https'
|
202
|
-
http.use_ssl = true
|
203
|
-
end
|
351
|
+
http.use_ssl = true if destination.scheme == 'https'
|
204
352
|
request.body = payload.to_json
|
205
353
|
request.set_content_type('application/json; charset=utf-8')
|
206
354
|
response = http.request(request)
|
207
|
-
|
355
|
+
logger.add(good_status_code?(response.code) ? Logger::INFO : Logger::WARN) do
|
208
356
|
"POST request to #{@collector_uri} finished with status code #{response.code}"
|
209
|
-
|
357
|
+
end
|
210
358
|
|
211
359
|
response
|
212
360
|
end
|
213
361
|
|
214
|
-
# Only 2xx and 3xx status codes are considered successes
|
215
|
-
#
|
216
362
|
Contract String => Bool
|
217
|
-
|
363
|
+
# Check if the response is good.
|
364
|
+
# Only 2xx and 3xx status codes are considered successes.
|
365
|
+
# @private
|
366
|
+
def good_status_code?(status_code)
|
218
367
|
status_code.to_i >= 200 && status_code.to_i < 400
|
219
368
|
end
|
220
369
|
|
221
|
-
private :
|
370
|
+
private :create_collector_uri,
|
222
371
|
:http_get,
|
223
372
|
:http_post
|
224
|
-
|
225
373
|
end
|
226
374
|
|
227
|
-
|
375
|
+
# This {Emitter} subclass provides asynchronous event sending. Whenever the
|
376
|
+
# buffer is flushed, the AsyncEmitter places the flushed events in a work
|
377
|
+
# queue. The AsyncEmitter asynchronously sends events in this queue using a
|
378
|
+
# thread pool of a fixed size. The size of the thread pool is 1 by default,
|
379
|
+
# but can be configured as part of the options hash during initialization.
|
380
|
+
#
|
381
|
+
# @see Emitter
|
382
|
+
# @api public
|
228
383
|
class AsyncEmitter < Emitter
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
384
|
+
Contract KeywordArgs[endpoint: String, options: Optional[STRICT_CONFIG_HASH]] => Any
|
385
|
+
# Create a new AsyncEmitter object. The endpoint is required.
|
386
|
+
#
|
387
|
+
# @example Initializing an AsyncEmitter with all the possible extra configuration.
|
388
|
+
# success_callback = ->(success_count) { puts "#{success_count} events sent successfully" }
|
389
|
+
# failure_callback = ->(success_count, failures) do
|
390
|
+
# puts "#{success_count} events sent successfully, #{failures.size} sent unsuccessfully"
|
391
|
+
# end
|
392
|
+
#
|
393
|
+
# Emitter.new(endpoint: 'collector.example.com',
|
394
|
+
# options: { path: '/my-pipeline/1',
|
395
|
+
# protocol: 'https',
|
396
|
+
# port: 443,
|
397
|
+
# method: 'post',
|
398
|
+
# buffer_size: 5,
|
399
|
+
# on_success: success_callback,
|
400
|
+
# on_failure: failure_callback,
|
401
|
+
# logger: Logger.new(STDOUT),
|
402
|
+
# thread_count: 5 })
|
403
|
+
#
|
404
|
+
# The options hash can have any of these optional parameters:
|
405
|
+
#
|
406
|
+
# | Parameter | Description | Type |
|
407
|
+
# | --- | --- | --- |
|
408
|
+
# | path | Override the default path for appending to the endpoint | String |
|
409
|
+
# | protocol | 'http' or 'https' | String |
|
410
|
+
# | port | The port for the connection | Integer |
|
411
|
+
# | method | 'get' or 'post' | String |
|
412
|
+
# | buffer_size | Number of events to send at once | Integer |
|
413
|
+
# | on_success | A function to call if events were sent successfully | Function |
|
414
|
+
# | on_failure | A function to call if events did not send | Function |
|
415
|
+
# | thread_count | Number of threads to use | Integer |
|
416
|
+
# | logger | Log somewhere other than STDERR | Logger |
|
417
|
+
#
|
418
|
+
# The `thread_count` determines the number of worker threads which will be
|
419
|
+
# used to send events.
|
420
|
+
#
|
421
|
+
# If you choose to use HTTPS, we recommend using port 443.
|
422
|
+
#
|
423
|
+
# @note if you test the AsyncEmitter by using a short script to send an
|
424
|
+
# event, you may find that the event fails to send. This is because the
|
425
|
+
# process exits before the flushing thread is finished. You can get round
|
426
|
+
# this either by adding a sleep(10) to the end of your script or by using
|
427
|
+
# the synchronous flush.
|
428
|
+
#
|
429
|
+
# @param endpoint [String] the endpoint to send the events to
|
430
|
+
# @param options [Hash] allowed configuration options
|
431
|
+
#
|
432
|
+
# @see Emitter#initialize
|
433
|
+
# @api public
|
434
|
+
def initialize(endpoint:, options: {})
|
435
|
+
@queue = Queue.new
|
233
436
|
# @all_processed_condition and @results_unprocessed are used to emulate Python's Queue.task_done()
|
234
437
|
@queue.extend(MonitorMixin)
|
235
438
|
@all_processed_condition = @queue.new_cond
|
236
439
|
@results_unprocessed = 0
|
237
|
-
(
|
238
|
-
|
239
|
-
consume
|
240
|
-
end
|
241
|
-
end
|
242
|
-
super(endpoint, config)
|
440
|
+
(options[:thread_count] || 1).times { Thread.new { consume } }
|
441
|
+
super(endpoint: endpoint, options: options)
|
243
442
|
end
|
244
443
|
|
444
|
+
# AsyncEmitters use the MonitorMixin module, which provides the
|
445
|
+
# `synchronize` and `broadcast` methods.
|
446
|
+
# @private
|
245
447
|
def consume
|
246
448
|
loop do
|
247
449
|
work_unit = @queue.pop
|
@@ -253,28 +455,34 @@ module SnowplowTracker
|
|
253
455
|
end
|
254
456
|
end
|
255
457
|
|
256
|
-
# Flush the buffer
|
257
|
-
#
|
458
|
+
# Flush the Emitter, forcing it to send all the events in its buffer, even
|
459
|
+
# if the buffer is not full.
|
258
460
|
#
|
259
|
-
|
461
|
+
# If `async` is true (the default), events are sent even if the queue is not
|
462
|
+
# empty. If `async` is false, it blocks until all queued events have been
|
463
|
+
# sent. Note that this method can be called by public API method
|
464
|
+
# {Tracker#flush}, which has a default of `async` being false.
|
465
|
+
#
|
466
|
+
# @param async [Bool] whether to flush asynchronously or not
|
467
|
+
#
|
468
|
+
# @see Emitter#flush
|
469
|
+
# @private
|
470
|
+
def flush(async = true)
|
260
471
|
loop do
|
261
472
|
@lock.synchronize do
|
262
|
-
@queue.synchronize
|
263
|
-
@results_unprocessed += 1
|
264
|
-
end
|
473
|
+
@queue.synchronize { @results_unprocessed += 1 }
|
265
474
|
@queue << @buffer
|
266
475
|
@buffer = []
|
267
476
|
end
|
268
|
-
|
269
|
-
|
477
|
+
unless async
|
478
|
+
logger.info('Starting synchronous flush')
|
270
479
|
@queue.synchronize do
|
271
480
|
@all_processed_condition.wait_while { @results_unprocessed > 0 }
|
272
|
-
|
481
|
+
logger.info('Finished synchronous flush')
|
273
482
|
end
|
274
483
|
end
|
275
|
-
break if @buffer.
|
484
|
+
break if @buffer.empty?
|
276
485
|
end
|
277
486
|
end
|
278
487
|
end
|
279
|
-
|
280
488
|
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# Copyright (c) 2013-2021 Snowplow Analytics Ltd. All rights reserved.
|
2
|
+
#
|
3
|
+
# This program is licensed to you under the Apache License Version 2.0,
|
4
|
+
# and you may not use this file except in compliance with the Apache License Version 2.0.
|
5
|
+
# You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
|
6
|
+
#
|
7
|
+
# Unless required by applicable law or agreed to in writing,
|
8
|
+
# software distributed under the Apache License Version 2.0 is distributed on an
|
9
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
10
|
+
# See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
|
11
|
+
|
12
|
+
# Author:: Snowplow Analytics Ltd
|
13
|
+
# Copyright:: Copyright (c) 2013-2021 Snowplow Analytics Ltd
|
14
|
+
# License:: Apache License Version 2.0
|
15
|
+
|
16
|
+
|
17
|
+
require 'contracts'
|
18
|
+
|
19
|
+
module SnowplowTracker
|
20
|
+
# If the Ruby tracker is incorporated into a website server, the events
|
21
|
+
# tracked will describe user activity on specific webpages. Knowing on which
|
22
|
+
# page an event occurred can be very valuable.
|
23
|
+
#
|
24
|
+
# Add page URL, page title and referrer URL to any event by adding a Page
|
25
|
+
# object to any {Tracker} `#track_x_event` method call.
|
26
|
+
#
|
27
|
+
# Page parameters are saved into the tracked event as part of the 'atomic'
|
28
|
+
# event properties, which have their own column in the eventual events table.
|
29
|
+
# For example, a Page's `page_url` parameter will be sent as `url` in the
|
30
|
+
# raw event payload, ending up in the `page_url` column.
|
31
|
+
#
|
32
|
+
#
|
33
|
+
# @note For {Tracker#track_page_view}, properties set in the Page object will
|
34
|
+
# override those properties given as arguments.
|
35
|
+
class Page
|
36
|
+
include Contracts
|
37
|
+
|
38
|
+
# @return [Hash] the stored page properties
|
39
|
+
attr_reader :details
|
40
|
+
|
41
|
+
Contract KeywordArgs[page_url: Maybe[String], page_title: Maybe[String], referrer: Maybe[String]] => Any
|
42
|
+
# Create a Page object for attaching page properties to events.
|
43
|
+
#
|
44
|
+
# Page properties will directly populate the event's `page_url`, `page_title` and `referrer` parameters.
|
45
|
+
#
|
46
|
+
# @example Creating a Page
|
47
|
+
# Page.new(page_url: 'http://www.example.com/second-page',
|
48
|
+
# page_title: 'Example title',
|
49
|
+
# referrer: 'http://www.example.com/first-page')
|
50
|
+
#
|
51
|
+
# @param page_url [String] the page URL
|
52
|
+
# @param page_title [String] the title of the page
|
53
|
+
# @param referrer [String] the URL of the previous page
|
54
|
+
def initialize(page_url: nil, page_title: nil, referrer: nil)
|
55
|
+
@details = { 'url' => page_url,
|
56
|
+
'page' => page_title,
|
57
|
+
'refr' => referrer }
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|