snowplow-tracker 0.6.1 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.md +56 -37
- data/lib/snowplow-tracker/emitters.rb +355 -147
- data/lib/snowplow-tracker/page.rb +60 -0
- data/lib/snowplow-tracker/payload.rb +30 -34
- data/lib/snowplow-tracker/self_describing_json.rb +92 -9
- data/lib/snowplow-tracker/subject.rb +282 -59
- data/lib/snowplow-tracker/timestamp.rb +95 -23
- data/lib/snowplow-tracker/tracker.rb +547 -242
- data/lib/snowplow-tracker/version.rb +33 -4
- data/lib/snowplow-tracker.rb +5 -5
- metadata +15 -17
- data/lib/snowplow-tracker/contracts.rb +0 -29
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright (c) 2013-
|
1
|
+
# Copyright (c) 2013-2021 Snowplow Analytics Ltd. All rights reserved.
|
2
2
|
#
|
3
3
|
# This program is licensed to you under the Apache License Version 2.0,
|
4
4
|
# and you may not use this file except in compliance with the Apache License Version 2.0.
|
@@ -9,239 +9,441 @@
|
|
9
9
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
10
10
|
# See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
|
11
11
|
|
12
|
-
# Author::
|
13
|
-
# Copyright:: Copyright (c) 2013-
|
12
|
+
# Author:: Snowplow Analytics Ltd
|
13
|
+
# Copyright:: Copyright (c) 2013-2021 Snowplow Analytics Ltd
|
14
14
|
# License:: Apache License Version 2.0
|
15
15
|
|
16
|
+
|
16
17
|
require 'net/https'
|
17
18
|
require 'set'
|
18
19
|
require 'logger'
|
19
20
|
require 'contracts'
|
20
21
|
|
21
22
|
module SnowplowTracker
|
22
|
-
|
23
|
+
# @see Emitter
|
24
|
+
# For logging Emitter activity messages
|
23
25
|
LOGGER = Logger.new(STDERR)
|
24
26
|
LOGGER.level = Logger::INFO
|
25
27
|
|
28
|
+
# This class sends events to the event collector. All {Tracker}s must have at
|
29
|
+
# least one associated Emitter or the subclass AsyncEmitter.
|
30
|
+
#
|
31
|
+
# The network settings are defined as part of the Emitter initalization. This
|
32
|
+
# table displays the default Emitter settings:
|
33
|
+
#
|
34
|
+
# | Property | Default setting |
|
35
|
+
# | --- | --- |
|
36
|
+
# | Protocol | HTTP |
|
37
|
+
# | Method | GET |
|
38
|
+
# | Buffer size | 1 |
|
39
|
+
# | Path | `/i` |
|
40
|
+
#
|
41
|
+
# The buffer size is 1 because GET requests can only contain one event.
|
42
|
+
#
|
43
|
+
# If you choose to use POST requests, the buffer_size defaults to 10, and the
|
44
|
+
# buffered events are all sent together in a single request. The default path
|
45
|
+
# is '/com.snowplowanalytics.snowplow/tp2' for Emitters using POST.
|
46
|
+
#
|
47
|
+
# # Logging
|
48
|
+
# Emitters log their activity to STDERR by default, using the Ruby standard
|
49
|
+
# library Logger class. A different logger can be configured during Emitter
|
50
|
+
# initialization. For example, to disable logging, you could provide
|
51
|
+
# `Logger.new(IO::NULL)` in the options hash.
|
52
|
+
#
|
53
|
+
# By default, only messages with priority "INFO" or higher will be logged.
|
54
|
+
# This can be changed at any time for the default logger, which is saved as a
|
55
|
+
# module constant (`LOGGER = Logger.new(STDERR)`). If you are not using the
|
56
|
+
# default logger, set the message level before initializing your Emitter.
|
57
|
+
#
|
58
|
+
# @see https://ruby-doc.org/stdlib-2.7.2/libdoc/logger/rdoc/Logger.html Logger documentation
|
59
|
+
#
|
60
|
+
# @example Changing the logger message level.
|
61
|
+
# require 'logger'
|
62
|
+
# SnowplowTracker::LOGGER.level = Logger::DEBUG
|
26
63
|
class Emitter
|
27
|
-
|
28
64
|
include Contracts
|
29
65
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
:
|
35
|
-
:
|
36
|
-
:
|
37
|
-
:
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
66
|
+
# Contract types
|
67
|
+
|
68
|
+
# @private
|
69
|
+
CONFIG_HASH = {
|
70
|
+
path: Maybe[String],
|
71
|
+
protocol: Maybe[Or['http', 'https']],
|
72
|
+
port: Maybe[Num],
|
73
|
+
method: Maybe[Or['get', 'post']],
|
74
|
+
buffer_size: Maybe[Num],
|
75
|
+
on_success: Maybe[Func[Num => Any]],
|
76
|
+
on_failure: Maybe[Func[Num, Hash => Any]],
|
77
|
+
thread_count: Maybe[Num],
|
78
|
+
logger: Maybe[Logger]
|
79
|
+
}
|
80
|
+
|
81
|
+
# @private
|
82
|
+
STRICT_CONFIG_HASH = And[CONFIG_HASH, ->(x) {
|
83
|
+
(x.class == Hash) && Set.new(x.keys).subset?(Set.new(CONFIG_HASH.keys))
|
84
|
+
}]
|
85
|
+
|
86
|
+
# @!group Public constants
|
87
|
+
|
88
|
+
# Default Emitter settings
|
89
|
+
DEFAULT_CONFIG = {
|
90
|
+
protocol: 'http',
|
91
|
+
method: 'get'
|
47
92
|
}
|
48
93
|
|
49
|
-
|
50
|
-
|
51
|
-
|
94
|
+
# @!endgroup
|
95
|
+
|
96
|
+
# @private
|
97
|
+
attr_reader :logger
|
98
|
+
|
99
|
+
Contract KeywordArgs[endpoint: String, options: Optional[STRICT_CONFIG_HASH]] => Any
|
100
|
+
# Create a new Emitter instance. The endpoint is required.
|
101
|
+
#
|
102
|
+
# @example Initializing an Emitter with all the possible extra configuration.
|
103
|
+
# success_callback = ->(success_count) { puts "#{success_count} events sent successfully" }
|
104
|
+
# failure_callback = ->(success_count, failures) do
|
105
|
+
# puts "#{success_count} events sent successfully, #{failures.size} sent unsuccessfully"
|
106
|
+
# end
|
107
|
+
#
|
108
|
+
# Emitter.new(endpoint: 'collector.example.com',
|
109
|
+
# options: { path: '/my-pipeline/1',
|
110
|
+
# protocol: 'https',
|
111
|
+
# port: 443,
|
112
|
+
# method: 'post',
|
113
|
+
# buffer_size: 5,
|
114
|
+
# on_success: success_callback,
|
115
|
+
# on_failure: failure_callback,
|
116
|
+
# logger: Logger.new(STDOUT) })
|
117
|
+
#
|
118
|
+
# The options hash can have any of these optional parameters:
|
119
|
+
#
|
120
|
+
# | Parameter | Description | Type |
|
121
|
+
# | --- | --- | --- |
|
122
|
+
# | path | Override the default path for appending to the endpoint | String |
|
123
|
+
# | protocol | 'http' or 'https' | String |
|
124
|
+
# | port | The port for the connection | Integer |
|
125
|
+
# | method | 'get' or 'post' | String |
|
126
|
+
# | buffer_size | Number of events to send at once | Integer |
|
127
|
+
# | on_success | A function to call if events were sent successfully | Function |
|
128
|
+
# | on_failure | A function to call if events did not send | Function |
|
129
|
+
# | thread_count | Number of threads to use | Integer |
|
130
|
+
# | logger | Log somewhere other than STDERR | Logger |
|
131
|
+
#
|
132
|
+
# Note that `thread_count` is relevant only to the subclass {AsyncEmitter},
|
133
|
+
# and will be ignored if provided to an Emitter.
|
134
|
+
#
|
135
|
+
# If you choose to use HTTPS, we recommend using port 443.
|
136
|
+
#
|
137
|
+
# @param endpoint [String] the endpoint to send the events to
|
138
|
+
# @param options [Hash] allowed configuration options
|
139
|
+
#
|
140
|
+
# @see AsyncEmitter#initialize
|
141
|
+
# @api public
|
142
|
+
def initialize(endpoint:, options: {})
|
143
|
+
config = DEFAULT_CONFIG.merge(options)
|
52
144
|
@lock = Monitor.new
|
53
|
-
|
145
|
+
path = confirm_path(config)
|
146
|
+
@collector_uri = create_collector_uri(endpoint, config[:protocol], config[:port], path)
|
54
147
|
@buffer = []
|
55
|
-
|
56
|
-
@buffer_size = config[:buffer_size]
|
57
|
-
elsif config[:method] == 'get'
|
58
|
-
@buffer_size = 1
|
59
|
-
else
|
60
|
-
@buffer_size = 10
|
61
|
-
end
|
148
|
+
@buffer_size = confirm_buffer_size(config)
|
62
149
|
@method = config[:method]
|
63
150
|
@on_success = config[:on_success]
|
64
151
|
@on_failure = config[:on_failure]
|
65
|
-
|
152
|
+
@logger = config[:logger] || LOGGER
|
153
|
+
logger.info("#{self.class} initialized with endpoint #{@collector_uri}")
|
154
|
+
end
|
155
|
+
|
156
|
+
Contract Hash => Num
|
157
|
+
# Creates the `@buffer_size` variable during initialization. Unless
|
158
|
+
# otherwise defined, it's 1 for Emitters using GET and 10 for Emitters using
|
159
|
+
# POST requests.
|
160
|
+
# @private
|
161
|
+
def confirm_buffer_size(config)
|
162
|
+
return config[:buffer_size] unless config[:buffer_size].nil?
|
163
|
+
|
164
|
+
config[:method] == 'get' ? 1 : 10
|
165
|
+
end
|
166
|
+
|
167
|
+
Contract Hash => String
|
168
|
+
# Creates the `@path` variable during initialization. Allows a non-standard
|
169
|
+
# path to be provided.
|
170
|
+
# @private
|
171
|
+
def confirm_path(config)
|
172
|
+
return config[:path] unless config[:path].nil?
|
66
173
|
|
67
|
-
|
174
|
+
config[:method] == 'get' ? '/i' : '/com.snowplowanalytics.snowplow/tp2'
|
68
175
|
end
|
69
176
|
|
70
177
|
# Build the collector URI from the configuration hash
|
71
178
|
#
|
72
179
|
Contract String, String, Maybe[Num], String => String
|
73
|
-
|
74
|
-
|
75
|
-
|
180
|
+
# Creates the `@collector_uri` variable during initialization.
|
181
|
+
# The default is "http://{endpoint}/i".
|
182
|
+
# @private
|
183
|
+
def create_collector_uri(endpoint, protocol, port, path)
|
184
|
+
port_string = port.nil? ? '' : ":#{port}"
|
76
185
|
|
77
186
|
"#{protocol}://#{endpoint}#{port_string}#{path}"
|
78
187
|
end
|
79
188
|
|
80
|
-
# Add an event to the buffer and flush it if maximum size has been reached
|
81
|
-
#
|
82
189
|
Contract Hash => nil
|
190
|
+
# Add an event to the buffer and flush it if maximum size has been reached.
|
191
|
+
# This method is not required for standard Ruby tracker usage. A {Tracker}
|
192
|
+
# privately calls this method once the event payload is ready to send.
|
193
|
+
#
|
194
|
+
# We have included it as part of the public API for its possible use in the
|
195
|
+
# `on_failure` callback. This is the optional method, provided in the
|
196
|
+
# `options` Emitter initalization hash, that is called when events fail
|
197
|
+
# to send. You could use {#input} as part of your callback to immediately
|
198
|
+
# retry the failed event.
|
199
|
+
#
|
200
|
+
# @example A possible `on_failure` method using `#input`
|
201
|
+
# def retry_on_failure(failed_event_count, failed_events)
|
202
|
+
# # possible backoff-and-retry timeout here
|
203
|
+
# failed_events.each do |event|
|
204
|
+
# my_emitter.input(event)
|
205
|
+
# end
|
206
|
+
# end
|
207
|
+
#
|
208
|
+
# @api public
|
83
209
|
def input(payload)
|
84
|
-
payload.each { |k,v| payload[k] = v.to_s}
|
210
|
+
payload.each { |k, v| payload[k] = v.to_s }
|
85
211
|
@lock.synchronize do
|
86
212
|
@buffer.push(payload)
|
87
|
-
if @buffer.size >= @buffer_size
|
88
|
-
flush
|
89
|
-
end
|
213
|
+
flush if @buffer.size >= @buffer_size
|
90
214
|
end
|
91
215
|
|
92
216
|
nil
|
93
217
|
end
|
94
218
|
|
95
|
-
# Flush the buffer
|
96
|
-
#
|
97
219
|
Contract Bool => nil
|
98
|
-
|
220
|
+
# Flush the Emitter, forcing it to send all the events in its
|
221
|
+
# buffer, even if the buffer is not full. {Emitter} objects, unlike
|
222
|
+
# {AsyncEmitter}s, can only `flush` synchronously. A {Tracker} can manually flush all
|
223
|
+
# its Emitters by calling {Tracker#flush}, part of the public API which
|
224
|
+
# calls this method.
|
225
|
+
#
|
226
|
+
# The unused async parameter here is to avoid ArgumentError, since
|
227
|
+
# {AsyncEmitter#flush} does take an argument.
|
228
|
+
#
|
229
|
+
# @see AsyncEmitter#flush
|
230
|
+
# @private
|
231
|
+
def flush(_async = true)
|
99
232
|
@lock.synchronize do
|
100
233
|
send_requests(@buffer)
|
101
234
|
@buffer = []
|
102
235
|
end
|
236
|
+
|
103
237
|
nil
|
104
238
|
end
|
105
239
|
|
106
|
-
# Send all events in the buffer to the collector
|
107
|
-
#
|
108
240
|
Contract ArrayOf[Hash] => nil
|
109
|
-
|
110
|
-
|
111
|
-
|
241
|
+
# Send all events in the buffer to the collector
|
242
|
+
# @private
|
243
|
+
def send_requests(events)
|
244
|
+
if events.empty?
|
245
|
+
logger.info('Skipping sending events since buffer is empty')
|
112
246
|
return
|
113
247
|
end
|
114
|
-
LOGGER.info("Attempting to send #{evts.size} request#{evts.size == 1 ? '' : 's'}")
|
115
248
|
|
116
|
-
|
117
|
-
|
249
|
+
logger.info("Attempting to send #{events.size} request#{events.size == 1 ? '' : 's'}")
|
250
|
+
|
251
|
+
events.each do |event|
|
252
|
+
# add the sent timestamp, overwrite if already exists
|
253
|
+
event['stm'] = Timestamp.create.to_s
|
118
254
|
end
|
119
255
|
|
120
256
|
if @method == 'post'
|
121
|
-
|
122
|
-
begin
|
123
|
-
request = http_post(SelfDescribingJson.new(
|
124
|
-
'iglu:com.snowplowanalytics.snowplow/payload_data/jsonschema/1-0-4',
|
125
|
-
evts
|
126
|
-
).to_json)
|
127
|
-
post_succeeded = is_good_status_code(request.code)
|
128
|
-
rescue StandardError => se
|
129
|
-
LOGGER.warn(se)
|
130
|
-
end
|
131
|
-
if post_succeeded
|
132
|
-
unless @on_success.nil?
|
133
|
-
@on_success.call(evts.size)
|
134
|
-
end
|
135
|
-
else
|
136
|
-
unless @on_failure.nil?
|
137
|
-
@on_failure.call(0, evts)
|
138
|
-
end
|
139
|
-
end
|
140
|
-
|
257
|
+
send_requests_with_post(events)
|
141
258
|
elsif @method == 'get'
|
142
|
-
|
143
|
-
unsent_requests = []
|
144
|
-
evts.each do |evt|
|
145
|
-
get_succeeded = false
|
146
|
-
begin
|
147
|
-
request = http_get(evt)
|
148
|
-
get_succeeded = is_good_status_code(request.code)
|
149
|
-
rescue StandardError => se
|
150
|
-
LOGGER.warn(se)
|
151
|
-
end
|
152
|
-
if get_succeeded
|
153
|
-
success_count += 1
|
154
|
-
else
|
155
|
-
unsent_requests << evt
|
156
|
-
end
|
157
|
-
end
|
158
|
-
if unsent_requests.size == 0
|
159
|
-
unless @on_success.nil?
|
160
|
-
@on_success.call(success_count)
|
161
|
-
end
|
162
|
-
else
|
163
|
-
unless @on_failure.nil?
|
164
|
-
@on_failure.call(success_count, unsent_requests)
|
165
|
-
end
|
166
|
-
end
|
259
|
+
send_requests_with_get(events)
|
167
260
|
end
|
168
261
|
|
169
262
|
nil
|
170
263
|
end
|
171
264
|
|
172
|
-
|
173
|
-
#
|
174
|
-
|
265
|
+
Contract ArrayOf[Hash] => nil
|
266
|
+
# Part of {#send_requests}.
|
267
|
+
# @private
|
268
|
+
def send_requests_with_post(events)
|
269
|
+
post_succeeded = false
|
270
|
+
begin
|
271
|
+
request = http_post(SelfDescribingJson.new(
|
272
|
+
'iglu:com.snowplowanalytics.snowplow/payload_data/jsonschema/1-0-4',
|
273
|
+
events
|
274
|
+
).to_json)
|
275
|
+
post_succeeded = good_status_code?(request.code)
|
276
|
+
rescue StandardError => standard_error
|
277
|
+
logger.warn(standard_error)
|
278
|
+
end
|
279
|
+
|
280
|
+
if post_succeeded
|
281
|
+
@on_success.call(events.size) unless @on_success.nil?
|
282
|
+
else
|
283
|
+
@on_failure.call(0, events) unless @on_failure.nil?
|
284
|
+
end
|
285
|
+
|
286
|
+
nil
|
287
|
+
end
|
288
|
+
|
289
|
+
Contract ArrayOf[Hash] => nil
|
290
|
+
# Part of {#send_requests}.
|
291
|
+
# @private
|
292
|
+
def send_requests_with_get(events)
|
293
|
+
success_count = 0
|
294
|
+
unsent_requests = []
|
295
|
+
|
296
|
+
events.each do |event|
|
297
|
+
request = process_get_event(event)
|
298
|
+
request ? success_count += 1 : unsent_requests << event
|
299
|
+
end
|
300
|
+
|
301
|
+
if unsent_requests.size.zero?
|
302
|
+
@on_success.call(success_count) unless @on_success.nil?
|
303
|
+
else
|
304
|
+
@on_failure.call(success_count, unsent_requests) unless @on_failure.nil?
|
305
|
+
end
|
306
|
+
|
307
|
+
nil
|
308
|
+
end
|
309
|
+
|
310
|
+
Contract Hash => Bool
|
311
|
+
# Part of {#send_requests_with_get}.
|
312
|
+
# @private
|
313
|
+
def process_get_event(event)
|
314
|
+
get_succeeded = false
|
315
|
+
begin
|
316
|
+
request = http_get(event)
|
317
|
+
get_succeeded = good_status_code?(request.code)
|
318
|
+
rescue StandardError => standard_error
|
319
|
+
logger.warn(standard_error)
|
320
|
+
end
|
321
|
+
get_succeeded
|
322
|
+
end
|
323
|
+
|
324
|
+
Contract Hash => ->(x) { x.is_a? Net::HTTPResponse }
|
325
|
+
# Part of {#process_get_event}. This sends a GET request.
|
326
|
+
# @private
|
175
327
|
def http_get(payload)
|
176
328
|
destination = URI(@collector_uri + '?' + URI.encode_www_form(payload))
|
177
|
-
|
178
|
-
|
329
|
+
logger.info("Sending GET request to #{@collector_uri}...")
|
330
|
+
logger.debug("Payload: #{payload}")
|
179
331
|
http = Net::HTTP.new(destination.host, destination.port)
|
180
332
|
request = Net::HTTP::Get.new(destination.request_uri)
|
181
|
-
if destination.scheme == 'https'
|
182
|
-
http.use_ssl = true
|
183
|
-
end
|
333
|
+
http.use_ssl = true if destination.scheme == 'https'
|
184
334
|
response = http.request(request)
|
185
|
-
|
335
|
+
logger.add(good_status_code?(response.code) ? Logger::INFO : Logger::WARN) do
|
186
336
|
"GET request to #{@collector_uri} finished with status code #{response.code}"
|
187
|
-
|
337
|
+
end
|
188
338
|
|
189
339
|
response
|
190
340
|
end
|
191
341
|
|
192
|
-
|
193
|
-
#
|
194
|
-
|
342
|
+
Contract Hash => ->(x) { x.is_a? Net::HTTPResponse }
|
343
|
+
# Part of {#send_requests_with_post}. This sends a POST request.
|
344
|
+
# @private
|
195
345
|
def http_post(payload)
|
196
|
-
|
197
|
-
|
346
|
+
logger.info("Sending POST request to #{@collector_uri}...")
|
347
|
+
logger.debug("Payload: #{payload}")
|
198
348
|
destination = URI(@collector_uri)
|
199
349
|
http = Net::HTTP.new(destination.host, destination.port)
|
200
350
|
request = Net::HTTP::Post.new(destination.request_uri)
|
201
|
-
if destination.scheme == 'https'
|
202
|
-
http.use_ssl = true
|
203
|
-
end
|
351
|
+
http.use_ssl = true if destination.scheme == 'https'
|
204
352
|
request.body = payload.to_json
|
205
353
|
request.set_content_type('application/json; charset=utf-8')
|
206
354
|
response = http.request(request)
|
207
|
-
|
355
|
+
logger.add(good_status_code?(response.code) ? Logger::INFO : Logger::WARN) do
|
208
356
|
"POST request to #{@collector_uri} finished with status code #{response.code}"
|
209
|
-
|
357
|
+
end
|
210
358
|
|
211
359
|
response
|
212
360
|
end
|
213
361
|
|
214
|
-
# Only 2xx and 3xx status codes are considered successes
|
215
|
-
#
|
216
362
|
Contract String => Bool
|
217
|
-
|
363
|
+
# Check if the response is good.
|
364
|
+
# Only 2xx and 3xx status codes are considered successes.
|
365
|
+
# @private
|
366
|
+
def good_status_code?(status_code)
|
218
367
|
status_code.to_i >= 200 && status_code.to_i < 400
|
219
368
|
end
|
220
369
|
|
221
|
-
private :
|
370
|
+
private :create_collector_uri,
|
222
371
|
:http_get,
|
223
372
|
:http_post
|
224
|
-
|
225
373
|
end
|
226
374
|
|
227
|
-
|
375
|
+
# This {Emitter} subclass provides asynchronous event sending. Whenever the
|
376
|
+
# buffer is flushed, the AsyncEmitter places the flushed events in a work
|
377
|
+
# queue. The AsyncEmitter asynchronously sends events in this queue using a
|
378
|
+
# thread pool of a fixed size. The size of the thread pool is 1 by default,
|
379
|
+
# but can be configured as part of the options hash during initialization.
|
380
|
+
#
|
381
|
+
# @see Emitter
|
382
|
+
# @api public
|
228
383
|
class AsyncEmitter < Emitter
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
384
|
+
Contract KeywordArgs[endpoint: String, options: Optional[STRICT_CONFIG_HASH]] => Any
|
385
|
+
# Create a new AsyncEmitter object. The endpoint is required.
|
386
|
+
#
|
387
|
+
# @example Initializing an AsyncEmitter with all the possible extra configuration.
|
388
|
+
# success_callback = ->(success_count) { puts "#{success_count} events sent successfully" }
|
389
|
+
# failure_callback = ->(success_count, failures) do
|
390
|
+
# puts "#{success_count} events sent successfully, #{failures.size} sent unsuccessfully"
|
391
|
+
# end
|
392
|
+
#
|
393
|
+
# Emitter.new(endpoint: 'collector.example.com',
|
394
|
+
# options: { path: '/my-pipeline/1',
|
395
|
+
# protocol: 'https',
|
396
|
+
# port: 443,
|
397
|
+
# method: 'post',
|
398
|
+
# buffer_size: 5,
|
399
|
+
# on_success: success_callback,
|
400
|
+
# on_failure: failure_callback,
|
401
|
+
# logger: Logger.new(STDOUT),
|
402
|
+
# thread_count: 5 })
|
403
|
+
#
|
404
|
+
# The options hash can have any of these optional parameters:
|
405
|
+
#
|
406
|
+
# | Parameter | Description | Type |
|
407
|
+
# | --- | --- | --- |
|
408
|
+
# | path | Override the default path for appending to the endpoint | String |
|
409
|
+
# | protocol | 'http' or 'https' | String |
|
410
|
+
# | port | The port for the connection | Integer |
|
411
|
+
# | method | 'get' or 'post' | String |
|
412
|
+
# | buffer_size | Number of events to send at once | Integer |
|
413
|
+
# | on_success | A function to call if events were sent successfully | Function |
|
414
|
+
# | on_failure | A function to call if events did not send | Function |
|
415
|
+
# | thread_count | Number of threads to use | Integer |
|
416
|
+
# | logger | Log somewhere other than STDERR | Logger |
|
417
|
+
#
|
418
|
+
# The `thread_count` determines the number of worker threads which will be
|
419
|
+
# used to send events.
|
420
|
+
#
|
421
|
+
# If you choose to use HTTPS, we recommend using port 443.
|
422
|
+
#
|
423
|
+
# @note if you test the AsyncEmitter by using a short script to send an
|
424
|
+
# event, you may find that the event fails to send. This is because the
|
425
|
+
# process exits before the flushing thread is finished. You can get round
|
426
|
+
# this either by adding a sleep(10) to the end of your script or by using
|
427
|
+
# the synchronous flush.
|
428
|
+
#
|
429
|
+
# @param endpoint [String] the endpoint to send the events to
|
430
|
+
# @param options [Hash] allowed configuration options
|
431
|
+
#
|
432
|
+
# @see Emitter#initialize
|
433
|
+
# @api public
|
434
|
+
def initialize(endpoint:, options: {})
|
435
|
+
@queue = Queue.new
|
233
436
|
# @all_processed_condition and @results_unprocessed are used to emulate Python's Queue.task_done()
|
234
437
|
@queue.extend(MonitorMixin)
|
235
438
|
@all_processed_condition = @queue.new_cond
|
236
439
|
@results_unprocessed = 0
|
237
|
-
(
|
238
|
-
|
239
|
-
consume
|
240
|
-
end
|
241
|
-
end
|
242
|
-
super(endpoint, config)
|
440
|
+
(options[:thread_count] || 1).times { Thread.new { consume } }
|
441
|
+
super(endpoint: endpoint, options: options)
|
243
442
|
end
|
244
443
|
|
444
|
+
# AsyncEmitters use the MonitorMixin module, which provides the
|
445
|
+
# `synchronize` and `broadcast` methods.
|
446
|
+
# @private
|
245
447
|
def consume
|
246
448
|
loop do
|
247
449
|
work_unit = @queue.pop
|
@@ -253,28 +455,34 @@ module SnowplowTracker
|
|
253
455
|
end
|
254
456
|
end
|
255
457
|
|
256
|
-
# Flush the buffer
|
257
|
-
#
|
458
|
+
# Flush the Emitter, forcing it to send all the events in its buffer, even
|
459
|
+
# if the buffer is not full.
|
258
460
|
#
|
259
|
-
|
461
|
+
# If `async` is true (the default), events are sent even if the queue is not
|
462
|
+
# empty. If `async` is false, it blocks until all queued events have been
|
463
|
+
# sent. Note that this method can be called by public API method
|
464
|
+
# {Tracker#flush}, which has a default of `async` being false.
|
465
|
+
#
|
466
|
+
# @param async [Bool] whether to flush asynchronously or not
|
467
|
+
#
|
468
|
+
# @see Emitter#flush
|
469
|
+
# @private
|
470
|
+
def flush(async = true)
|
260
471
|
loop do
|
261
472
|
@lock.synchronize do
|
262
|
-
@queue.synchronize
|
263
|
-
@results_unprocessed += 1
|
264
|
-
end
|
473
|
+
@queue.synchronize { @results_unprocessed += 1 }
|
265
474
|
@queue << @buffer
|
266
475
|
@buffer = []
|
267
476
|
end
|
268
|
-
|
269
|
-
|
477
|
+
unless async
|
478
|
+
logger.info('Starting synchronous flush')
|
270
479
|
@queue.synchronize do
|
271
480
|
@all_processed_condition.wait_while { @results_unprocessed > 0 }
|
272
|
-
|
481
|
+
logger.info('Finished synchronous flush')
|
273
482
|
end
|
274
483
|
end
|
275
|
-
break if @buffer.
|
484
|
+
break if @buffer.empty?
|
276
485
|
end
|
277
486
|
end
|
278
487
|
end
|
279
|
-
|
280
488
|
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# Copyright (c) 2013-2021 Snowplow Analytics Ltd. All rights reserved.
|
2
|
+
#
|
3
|
+
# This program is licensed to you under the Apache License Version 2.0,
|
4
|
+
# and you may not use this file except in compliance with the Apache License Version 2.0.
|
5
|
+
# You may obtain a copy of the Apache License Version 2.0 at http://www.apache.org/licenses/LICENSE-2.0.
|
6
|
+
#
|
7
|
+
# Unless required by applicable law or agreed to in writing,
|
8
|
+
# software distributed under the Apache License Version 2.0 is distributed on an
|
9
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
10
|
+
# See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
|
11
|
+
|
12
|
+
# Author:: Snowplow Analytics Ltd
|
13
|
+
# Copyright:: Copyright (c) 2013-2021 Snowplow Analytics Ltd
|
14
|
+
# License:: Apache License Version 2.0
|
15
|
+
|
16
|
+
|
17
|
+
require 'contracts'
|
18
|
+
|
19
|
+
module SnowplowTracker
|
20
|
+
# If the Ruby tracker is incorporated into a website server, the events
|
21
|
+
# tracked will describe user activity on specific webpages. Knowing on which
|
22
|
+
# page an event occurred can be very valuable.
|
23
|
+
#
|
24
|
+
# Add page URL, page title and referrer URL to any event by adding a Page
|
25
|
+
# object to any {Tracker} `#track_x_event` method call.
|
26
|
+
#
|
27
|
+
# Page parameters are saved into the tracked event as part of the 'atomic'
|
28
|
+
# event properties, which have their own column in the eventual events table.
|
29
|
+
# For example, a Page's `page_url` parameter will be sent as `url` in the
|
30
|
+
# raw event payload, ending up in the `page_url` column.
|
31
|
+
#
|
32
|
+
#
|
33
|
+
# @note For {Tracker#track_page_view}, properties set in the Page object will
|
34
|
+
# override those properties given as arguments.
|
35
|
+
class Page
|
36
|
+
include Contracts
|
37
|
+
|
38
|
+
# @return [Hash] the stored page properties
|
39
|
+
attr_reader :details
|
40
|
+
|
41
|
+
Contract KeywordArgs[page_url: Maybe[String], page_title: Maybe[String], referrer: Maybe[String]] => Any
|
42
|
+
# Create a Page object for attaching page properties to events.
|
43
|
+
#
|
44
|
+
# Page properties will directly populate the event's `page_url`, `page_title` and `referrer` parameters.
|
45
|
+
#
|
46
|
+
# @example Creating a Page
|
47
|
+
# Page.new(page_url: 'http://www.example.com/second-page',
|
48
|
+
# page_title: 'Example title',
|
49
|
+
# referrer: 'http://www.example.com/first-page')
|
50
|
+
#
|
51
|
+
# @param page_url [String] the page URL
|
52
|
+
# @param page_title [String] the title of the page
|
53
|
+
# @param referrer [String] the URL of the previous page
|
54
|
+
def initialize(page_url: nil, page_title: nil, referrer: nil)
|
55
|
+
@details = { 'url' => page_url,
|
56
|
+
'page' => page_title,
|
57
|
+
'refr' => referrer }
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|