powertrack 1.0.3 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/Gemfile.lock +1 -1
- data/History.txt +9 -4
- data/README.md +15 -2
- data/TODO.md +1 -0
- data/lib/powertrack/streaming/retrier.rb +1 -1
- data/lib/powertrack/streaming/stream.rb +53 -18
- data/lib/powertrack/version.rb +1 -1
- data/test/minitest_helper.rb +3 -2
- data/test/test_manage_rules.rb +9 -1
- data/test/test_track_stream.rb +42 -12
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
NjZjMjc0OWE1ODU4NTUyNTY2Zjk1YzdiMjYxZDgxMjYwMTI0MmQyMg==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
MmEzNDQ0YWYzZGYwODcwNDM0MDU5MDc1YjdlZTIwMTRjMmRhNzZlYQ==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
NTU5ZmE3MzI3NzUzYTYxYzgyNGZiYzBkYjllODdiMTA5M2YyYWIyN2Y3M2E2
|
10
|
+
M2ZhNDRmZWFlNTE1ZTI4YTA0OGZiNDg4NDFiYWQwOWRiNGMwNjhlMTIzZWRi
|
11
|
+
NGNhNzFiZTIyM2Q3Y2I4OTUzYmQ0ZmRmYjQ4ODRiODhkMjUzMzg=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
YzQxZjcxMDVmYTQzZmRlODNjMWIyNmNlNDczNWUyMTQzNzM5MmRlZDk2ZDJh
|
14
|
+
ZDhkNmU3MzRmMTFiZjBlM2RiZjYzZmQ4MzQ1MjI1YThjZGVmYjdjMWZjZTI0
|
15
|
+
OGUxYzYwY2JlY2Q3NDMwOWQ1N2NlMjg1MDI1ZjU2NzU5NGQzOGE=
|
data/Gemfile.lock
CHANGED
data/History.txt
CHANGED
@@ -1,10 +1,15 @@
|
|
1
|
-
1.0
|
2
|
-
|
1
|
+
v1.1.0
|
2
|
+
------
|
3
|
+
|
4
|
+
* Add support for Replay feature
|
5
|
+
|
6
|
+
v1.0.3
|
7
|
+
------
|
3
8
|
|
4
9
|
* Retrieve the on_system function from options
|
5
10
|
|
6
|
-
|
7
|
-
|
11
|
+
v1.0.2
|
12
|
+
------
|
8
13
|
|
9
14
|
* to_json can get options in order for powertrack to support the standard
|
10
15
|
JSON library (through the MultiJson gem). Thanks to @duncanita for the PR.
|
data/README.md
CHANGED
@@ -90,8 +90,8 @@ The ```:stop_timeout``` may be fine-tune when passing options to the tracker.
|
|
90
90
|
|
91
91
|
As highly recommended by GNIP, the PowerTrack::Stream client manages an exponential
|
92
92
|
backoff retry mechanism when a disconnection happens. The reconnections can be
|
93
|
-
fine-tuned through the
|
94
|
-
```track``` call.
|
93
|
+
fine-tuned through the ```:max_retries``` and ```:backoff``` options passed to
|
94
|
+
the ```track``` call.
|
95
95
|
|
96
96
|
## Backfill
|
97
97
|
|
@@ -102,6 +102,19 @@ last 5 minutes when reconnecting.
|
|
102
102
|
Provide a (numerical) client id as the last (but optional) argument of the
|
103
103
|
PowerTrack::Stream constructor to enable this feature.
|
104
104
|
|
105
|
+
## Replay
|
106
|
+
|
107
|
+
Replay is a feature provided by GNIP to recover lost activities over the last
|
108
|
+
5 days. The Replay stream lives aside the realtime stream and is activated
|
109
|
+
by setting the ```:replay``` option to true when building a ```PowerTrack::Stream```
|
110
|
+
object.
|
111
|
+
|
112
|
+
Once Replay is activated, you use the stream as previously, starting by
|
113
|
+
configuring some rules that define which activities you will recover. Once done,
|
114
|
+
you can track the stream by specifying a timeframe with the ```:from```
|
115
|
+
and ```:to options```. By default, replay happens over 30 minutes, starting 1
|
116
|
+
hour ago.
|
117
|
+
|
105
118
|
## Errors
|
106
119
|
|
107
120
|
All the errors that come from PowerTrack are defined through an ad-hoc exception
|
data/TODO.md
CHANGED
@@ -78,5 +78,6 @@ See [Managing disconnections](http://support.gnip.com/articles/disconnections-ex
|
|
78
78
|
## Other features
|
79
79
|
|
80
80
|
* _[DONE]_ Support test and development streams
|
81
|
+
* _[DONE]_ Support Replay mode (5-days back history)
|
81
82
|
* Support status dashboard
|
82
83
|
* Support Historical Powertrack
|
@@ -18,7 +18,7 @@ module PowerTrack
|
|
18
18
|
include VoidLogger::LoggerMixin
|
19
19
|
|
20
20
|
# The format of the URLs to connect to the various stream services
|
21
|
-
FEATURE_URL_FORMAT = "https://%s:%s/accounts/%s/publishers/%s/
|
21
|
+
FEATURE_URL_FORMAT = "https://%s:%s/accounts/%s/publishers/%s/%s/track/%s%s.json".freeze
|
22
22
|
|
23
23
|
# The default timeout on a connection to PowerTrack. Can be overriden per call.
|
24
24
|
DEFAULT_CONNECTION_TIMEOUT = 30
|
@@ -32,16 +32,22 @@ module PowerTrack
|
|
32
32
|
connect_timeout: DEFAULT_CONNECTION_TIMEOUT,
|
33
33
|
inactivity_timeout: DEFAULT_INACTIVITY_TIMEOUT,
|
34
34
|
# use a client id if you want to leverage the Backfill feature
|
35
|
-
client_id: nil
|
35
|
+
client_id: nil,
|
36
|
+
# enable the replay mode to get activities over the last 5 days
|
37
|
+
# see http://support.gnip.com/apis/replay/api_reference.html
|
38
|
+
replay: false
|
36
39
|
}
|
37
40
|
|
38
41
|
DEFAULT_OK_RESPONSE_STATUS = 200
|
39
42
|
|
40
|
-
#
|
43
|
+
# The patterns used to identify the various types of message received from GNIP
|
41
44
|
# everything else is an activity
|
42
45
|
HEARTBEAT_MESSAGE_PATTERN = /\A\s*\z/
|
43
46
|
SYSTEM_MESSAGE_PATTERN = /\A\s*\{\s*"(info|warn|error)":/mi
|
44
47
|
|
48
|
+
# The format used to send UTC timestamps in Replay mode
|
49
|
+
REPLAY_TIMESTAMP_FORMAT = '%Y%m%d%H%M'
|
50
|
+
|
45
51
|
attr_reader :username, :account_name, :data_source, :label
|
46
52
|
|
47
53
|
def initialize(username, password, account_name, data_source, label, options=nil)
|
@@ -52,6 +58,8 @@ module PowerTrack
|
|
52
58
|
@label = label
|
53
59
|
@options = DEFAULT_STREAM_OPTIONS.merge(options || {})
|
54
60
|
@client_id = @options[:client_id]
|
61
|
+
@replay = !!@options[:replay]
|
62
|
+
@stream_mode = @replay ? 'replay' : 'streams'
|
55
63
|
end
|
56
64
|
|
57
65
|
# Adds many rules to your PowerTrack stream’s ruleset.
|
@@ -105,7 +113,7 @@ module PowerTrack
|
|
105
113
|
# receive GZip-compressed payloads ?
|
106
114
|
compressed: true,
|
107
115
|
# max number of retries after a disconnection
|
108
|
-
max_retries:
|
116
|
+
max_retries: 2,
|
109
117
|
# advanced options to configure exponential backoff used for retries
|
110
118
|
backoff: nil,
|
111
119
|
# max number of seconds to wait for last message handlers to complete
|
@@ -113,6 +121,10 @@ module PowerTrack
|
|
113
121
|
# pass message in raw form (JSON formatted string) instead of JSON-decoded
|
114
122
|
# Ruby objects to message handlers
|
115
123
|
raw: false,
|
124
|
+
# the starting date from which the activities will be recovered (replay mode only)
|
125
|
+
from: nil,
|
126
|
+
# the ending date to which the activities will be recovered (replay mode only)
|
127
|
+
to: nil,
|
116
128
|
# called for each message received, except heartbeats
|
117
129
|
on_message: nil,
|
118
130
|
# called for each activity received
|
@@ -158,6 +170,7 @@ module PowerTrack
|
|
158
170
|
gnip_server_port,
|
159
171
|
@account_name,
|
160
172
|
@data_source,
|
173
|
+
@stream_mode,
|
161
174
|
@label,
|
162
175
|
feature ]
|
163
176
|
|
@@ -187,6 +200,7 @@ module PowerTrack
|
|
187
200
|
# Opens a new connection to GNIP PowerTrack.
|
188
201
|
def connect(hostname, feature=nil)
|
189
202
|
url = feature_url(hostname, feature)
|
203
|
+
logger.debug("Connecting to '#{url}' with headers #{connection_headers}...")
|
190
204
|
EventMachine::HttpRequest.new(url, connection_headers)
|
191
205
|
end
|
192
206
|
|
@@ -283,14 +297,14 @@ module PowerTrack
|
|
283
297
|
handle_api_response(resp_status, resp_error, resp_body, options[:ok])
|
284
298
|
end
|
285
299
|
|
286
|
-
# Returns the type of message received on the stream,
|
287
|
-
#
|
300
|
+
# Returns the type of message received on the stream, together with a
|
301
|
+
# level indicator in case of a system message, nil otherwise.
|
288
302
|
def message_type(message)
|
289
303
|
case message
|
290
|
-
when HEARTBEAT_MESSAGE_PATTERN then :heartbeat
|
291
|
-
when SYSTEM_MESSAGE_PATTERN then :system
|
304
|
+
when HEARTBEAT_MESSAGE_PATTERN then [ :heartbeat, nil ]
|
305
|
+
when SYSTEM_MESSAGE_PATTERN then [ :system, $1.downcase.to_sym ]
|
292
306
|
else
|
293
|
-
:activity
|
307
|
+
[ :activity, nil ]
|
294
308
|
end
|
295
309
|
end
|
296
310
|
|
@@ -322,7 +336,25 @@ module PowerTrack
|
|
322
336
|
EM.run do
|
323
337
|
logger.info "Starting the reactor..."
|
324
338
|
con = connect('stream')
|
325
|
-
|
339
|
+
get_opts = { head: track_req_headers(options[:compressed]) }
|
340
|
+
|
341
|
+
# add a timeframe in replay mode
|
342
|
+
if @replay
|
343
|
+
now = Time.now
|
344
|
+
# start 1 hour ago by default
|
345
|
+
from = options[:from] || (now - 60*60)
|
346
|
+
# stop 30 minutes ago by default
|
347
|
+
to = options[:to] || (now - 30*60)
|
348
|
+
|
349
|
+
get_opts[:query] = {
|
350
|
+
'fromDate' => from.utc.strftime(REPLAY_TIMESTAMP_FORMAT),
|
351
|
+
'toDate' => to.utc.strftime(REPLAY_TIMESTAMP_FORMAT)
|
352
|
+
}
|
353
|
+
|
354
|
+
logger.info "Replay mode enabled from '#{from}' to '#{to}'"
|
355
|
+
end
|
356
|
+
|
357
|
+
http = con.get(get_opts)
|
326
358
|
|
327
359
|
# polls to see if the connection should be closed
|
328
360
|
close_watcher = EM.add_periodic_timer(1) do
|
@@ -352,19 +384,21 @@ module PowerTrack
|
|
352
384
|
next
|
353
385
|
end
|
354
386
|
|
355
|
-
# reset retries when some (valid) data are received
|
356
|
-
if retrier.retrying?
|
357
|
-
logger.info "Resetting retries..."
|
358
|
-
retrier.reset!
|
359
|
-
end
|
360
|
-
|
361
387
|
# process the chunk
|
362
388
|
buffer.process(chunk) do |raw|
|
363
389
|
logger.debug "New message received"
|
390
|
+
|
391
|
+
# get the message type and its (optional) level
|
392
|
+
m_type, m_level = message_type(raw)
|
393
|
+
|
394
|
+
# reset retries when some (valid) data are received
|
395
|
+
if retrier.retrying? && m_level != :error
|
396
|
+
logger.info "Resetting retries..."
|
397
|
+
retrier.reset!
|
398
|
+
end
|
399
|
+
|
364
400
|
EM.defer do
|
365
401
|
# select the right message handler(s) according to the message type
|
366
|
-
m_type = message_type(raw)
|
367
|
-
|
368
402
|
if m_type == :heartbeat
|
369
403
|
on_heartbeat.call if on_heartbeat
|
370
404
|
else
|
@@ -401,6 +435,7 @@ module PowerTrack
|
|
401
435
|
resp_status = http_client.response_header.status || DEFAULT_OK_RESPONSE_STATUS
|
402
436
|
resp_error = http_client.error
|
403
437
|
resp_body = http_client.response
|
438
|
+
|
404
439
|
wait_til_defers_finish_and_stop(stop_timeout)
|
405
440
|
end
|
406
441
|
end
|
data/lib/powertrack/version.rb
CHANGED
data/test/minitest_helper.rb
CHANGED
@@ -30,12 +30,13 @@ class Minitest::Test
|
|
30
30
|
end
|
31
31
|
|
32
32
|
# Returns a brand-new stream based on the config found in test/powertrack.yml.
|
33
|
-
def new_stream
|
33
|
+
def new_stream(replay=false)
|
34
34
|
PowerTrack::Stream.new(
|
35
35
|
powertrack_config[:username],
|
36
36
|
powertrack_config[:password],
|
37
37
|
powertrack_config[:account_name],
|
38
38
|
powertrack_config[:data_source],
|
39
|
-
powertrack_config[:stream_label]
|
39
|
+
replay ? 'prod' : powertrack_config[:stream_label],
|
40
|
+
replay: replay)
|
40
41
|
end
|
41
42
|
end
|
data/test/test_manage_rules.rb
CHANGED
@@ -5,7 +5,15 @@ require 'multi_json'
|
|
5
5
|
class TestManageRules < Minitest::Test
|
6
6
|
|
7
7
|
def test_add_then_delete_a_single_rule
|
8
|
-
|
8
|
+
add_then_delete_a_single_rule(false)
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_add_then_delete_a_single_rule_in_replay_mode
|
12
|
+
add_then_delete_a_single_rule(true)
|
13
|
+
end
|
14
|
+
|
15
|
+
def add_then_delete_a_single_rule(replay)
|
16
|
+
stream = new_stream(replay)
|
9
17
|
|
10
18
|
rule = PowerTrack::Rule.new('coke')
|
11
19
|
assert rule.valid?
|
data/test/test_track_stream.rb
CHANGED
@@ -4,8 +4,16 @@ require 'multi_json'
|
|
4
4
|
|
5
5
|
class TestTrackStream < Minitest::Test
|
6
6
|
|
7
|
-
def
|
8
|
-
|
7
|
+
def test_track_realtime_stream
|
8
|
+
track_simple_stream(false)
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_track_replay_stream
|
12
|
+
track_simple_stream(true)
|
13
|
+
end
|
14
|
+
|
15
|
+
def track_simple_stream(replay)
|
16
|
+
stream = new_stream(replay)
|
9
17
|
|
10
18
|
# add a logger
|
11
19
|
stream.logger = Logger.new(STDERR)
|
@@ -23,6 +31,8 @@ class TestTrackStream < Minitest::Test
|
|
23
31
|
received = 0
|
24
32
|
tweeted = 0
|
25
33
|
closed = false
|
34
|
+
from = nil
|
35
|
+
to = nil
|
26
36
|
|
27
37
|
# ready to track
|
28
38
|
on_message = lambda do |message|
|
@@ -34,28 +44,48 @@ class TestTrackStream < Minitest::Test
|
|
34
44
|
on_activity = lambda do |tweet|
|
35
45
|
tweeted += 1
|
36
46
|
end
|
47
|
+
on_system = lambda do |message|
|
48
|
+
$stderr.puts message.inspect
|
49
|
+
end
|
37
50
|
|
38
51
|
close_now = lambda { closed }
|
39
52
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
53
|
+
if replay
|
54
|
+
now = Time.now
|
55
|
+
from = now - 31*60
|
56
|
+
to = now - 30*60
|
57
|
+
delay = to - from
|
58
|
+
else
|
59
|
+
delay = 60
|
60
|
+
Thread.new do
|
61
|
+
$stderr.puts "Time-bomb thread running for #{delay} seconds..."
|
62
|
+
sleep delay
|
63
|
+
$stderr.puts "Time to shut down !"
|
64
|
+
closed = true
|
65
|
+
end
|
46
66
|
end
|
47
67
|
|
48
68
|
started_at = Time.now
|
49
69
|
res = stream.track(on_message: on_message,
|
50
70
|
on_heartbeat: on_heartbeat,
|
51
71
|
on_activity: on_activity,
|
72
|
+
on_system: on_system,
|
52
73
|
close_now: close_now,
|
53
|
-
max_retries:
|
54
|
-
fake_disconnections: 20
|
74
|
+
max_retries: replay ? 0 : 2,
|
75
|
+
fake_disconnections: replay ? nil : 20,
|
76
|
+
from: from,
|
77
|
+
to: to)
|
78
|
+
|
79
|
+
ended_at = Time.now
|
55
80
|
|
56
81
|
assert_nil res
|
57
|
-
assert closed, 'Stream not closed'
|
58
|
-
|
82
|
+
assert replay || closed, 'Stream not closed'
|
83
|
+
|
84
|
+
if replay
|
85
|
+
assert (ended_at - started_at) <= delay
|
86
|
+
else
|
87
|
+
assert (ended_at - started_at) >= delay
|
88
|
+
end
|
59
89
|
|
60
90
|
assert heartbeats > 0, 'No heartbeat received'
|
61
91
|
puts "#{heartbeats} heartbeats received"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: powertrack
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Laurent Farcy
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2016-
|
13
|
+
date: 2016-07-05 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: bundler
|