powertrack 1.0.3 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/Gemfile.lock +1 -1
- data/History.txt +9 -4
- data/README.md +15 -2
- data/TODO.md +1 -0
- data/lib/powertrack/streaming/retrier.rb +1 -1
- data/lib/powertrack/streaming/stream.rb +53 -18
- data/lib/powertrack/version.rb +1 -1
- data/test/minitest_helper.rb +3 -2
- data/test/test_manage_rules.rb +9 -1
- data/test/test_track_stream.rb +42 -12
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
NjZjMjc0OWE1ODU4NTUyNTY2Zjk1YzdiMjYxZDgxMjYwMTI0MmQyMg==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
MmEzNDQ0YWYzZGYwODcwNDM0MDU5MDc1YjdlZTIwMTRjMmRhNzZlYQ==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
NTU5ZmE3MzI3NzUzYTYxYzgyNGZiYzBkYjllODdiMTA5M2YyYWIyN2Y3M2E2
|
10
|
+
M2ZhNDRmZWFlNTE1ZTI4YTA0OGZiNDg4NDFiYWQwOWRiNGMwNjhlMTIzZWRi
|
11
|
+
NGNhNzFiZTIyM2Q3Y2I4OTUzYmQ0ZmRmYjQ4ODRiODhkMjUzMzg=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
YzQxZjcxMDVmYTQzZmRlODNjMWIyNmNlNDczNWUyMTQzNzM5MmRlZDk2ZDJh
|
14
|
+
ZDhkNmU3MzRmMTFiZjBlM2RiZjYzZmQ4MzQ1MjI1YThjZGVmYjdjMWZjZTI0
|
15
|
+
OGUxYzYwY2JlY2Q3NDMwOWQ1N2NlMjg1MDI1ZjU2NzU5NGQzOGE=
|
data/Gemfile.lock
CHANGED
data/History.txt
CHANGED
@@ -1,10 +1,15 @@
|
|
1
|
-
1.0
|
2
|
-
|
1
|
+
v1.1.0
|
2
|
+
------
|
3
|
+
|
4
|
+
* Add support for Replay feature
|
5
|
+
|
6
|
+
v1.0.3
|
7
|
+
------
|
3
8
|
|
4
9
|
* Retrieve the on_system function from options
|
5
10
|
|
6
|
-
|
7
|
-
|
11
|
+
v1.0.2
|
12
|
+
------
|
8
13
|
|
9
14
|
* to_json can get options in order for powertrack to support the standard
|
10
15
|
JSON library (through the MultiJson gem). Thanks to @duncanita for the PR.
|
data/README.md
CHANGED
@@ -90,8 +90,8 @@ The ```:stop_timeout``` may be fine-tune when passing options to the tracker.
|
|
90
90
|
|
91
91
|
As highly recommended by GNIP, the PowerTrack::Stream client manages an exponential
|
92
92
|
backoff retry mechanism when a disconnection happens. The reconnections can be
|
93
|
-
fine-tuned through the
|
94
|
-
```track``` call.
|
93
|
+
fine-tuned through the ```:max_retries``` and ```:backoff``` options passed to
|
94
|
+
the ```track``` call.
|
95
95
|
|
96
96
|
## Backfill
|
97
97
|
|
@@ -102,6 +102,19 @@ last 5 minutes when reconnecting.
|
|
102
102
|
Provide a (numerical) client id as the last (but optional) argument of the
|
103
103
|
PowerTrack::Stream constructor to enable this feature.
|
104
104
|
|
105
|
+
## Replay
|
106
|
+
|
107
|
+
Replay is a feature provided by GNIP to recover lost activities over the last
|
108
|
+
5 days. The Replay stream lives aside the realtime stream and is activated
|
109
|
+
by setting the ```:replay``` option to true when building a ```PowerTrack::Stream```
|
110
|
+
object.
|
111
|
+
|
112
|
+
Once Replay is activated, you use the stream as previously, starting by
|
113
|
+
configuring some rules that define which activities you will recover. Once done,
|
114
|
+
you can track the stream by specifying a timeframe with the ```:from```
|
115
|
+
and ```:to options```. By default, replay happens over 30 minutes, starting 1
|
116
|
+
hour ago.
|
117
|
+
|
105
118
|
## Errors
|
106
119
|
|
107
120
|
All the errors that come from PowerTrack are defined through an ad-hoc exception
|
data/TODO.md
CHANGED
@@ -78,5 +78,6 @@ See [Managing disconnections](http://support.gnip.com/articles/disconnections-ex
|
|
78
78
|
## Other features
|
79
79
|
|
80
80
|
* _[DONE]_ Support test and development streams
|
81
|
+
* _[DONE]_ Support Replay mode (5-days back history)
|
81
82
|
* Support status dashboard
|
82
83
|
* Support Historical Powertrack
|
@@ -18,7 +18,7 @@ module PowerTrack
|
|
18
18
|
include VoidLogger::LoggerMixin
|
19
19
|
|
20
20
|
# The format of the URLs to connect to the various stream services
|
21
|
-
FEATURE_URL_FORMAT = "https://%s:%s/accounts/%s/publishers/%s/
|
21
|
+
FEATURE_URL_FORMAT = "https://%s:%s/accounts/%s/publishers/%s/%s/track/%s%s.json".freeze
|
22
22
|
|
23
23
|
# The default timeout on a connection to PowerTrack. Can be overriden per call.
|
24
24
|
DEFAULT_CONNECTION_TIMEOUT = 30
|
@@ -32,16 +32,22 @@ module PowerTrack
|
|
32
32
|
connect_timeout: DEFAULT_CONNECTION_TIMEOUT,
|
33
33
|
inactivity_timeout: DEFAULT_INACTIVITY_TIMEOUT,
|
34
34
|
# use a client id if you want to leverage the Backfill feature
|
35
|
-
client_id: nil
|
35
|
+
client_id: nil,
|
36
|
+
# enable the replay mode to get activities over the last 5 days
|
37
|
+
# see http://support.gnip.com/apis/replay/api_reference.html
|
38
|
+
replay: false
|
36
39
|
}
|
37
40
|
|
38
41
|
DEFAULT_OK_RESPONSE_STATUS = 200
|
39
42
|
|
40
|
-
#
|
43
|
+
# The patterns used to identify the various types of message received from GNIP
|
41
44
|
# everything else is an activity
|
42
45
|
HEARTBEAT_MESSAGE_PATTERN = /\A\s*\z/
|
43
46
|
SYSTEM_MESSAGE_PATTERN = /\A\s*\{\s*"(info|warn|error)":/mi
|
44
47
|
|
48
|
+
# The format used to send UTC timestamps in Replay mode
|
49
|
+
REPLAY_TIMESTAMP_FORMAT = '%Y%m%d%H%M'
|
50
|
+
|
45
51
|
attr_reader :username, :account_name, :data_source, :label
|
46
52
|
|
47
53
|
def initialize(username, password, account_name, data_source, label, options=nil)
|
@@ -52,6 +58,8 @@ module PowerTrack
|
|
52
58
|
@label = label
|
53
59
|
@options = DEFAULT_STREAM_OPTIONS.merge(options || {})
|
54
60
|
@client_id = @options[:client_id]
|
61
|
+
@replay = !!@options[:replay]
|
62
|
+
@stream_mode = @replay ? 'replay' : 'streams'
|
55
63
|
end
|
56
64
|
|
57
65
|
# Adds many rules to your PowerTrack stream’s ruleset.
|
@@ -105,7 +113,7 @@ module PowerTrack
|
|
105
113
|
# receive GZip-compressed payloads ?
|
106
114
|
compressed: true,
|
107
115
|
# max number of retries after a disconnection
|
108
|
-
max_retries:
|
116
|
+
max_retries: 2,
|
109
117
|
# advanced options to configure exponential backoff used for retries
|
110
118
|
backoff: nil,
|
111
119
|
# max number of seconds to wait for last message handlers to complete
|
@@ -113,6 +121,10 @@ module PowerTrack
|
|
113
121
|
# pass message in raw form (JSON formatted string) instead of JSON-decoded
|
114
122
|
# Ruby objects to message handlers
|
115
123
|
raw: false,
|
124
|
+
# the starting date from which the activities will be recovered (replay mode only)
|
125
|
+
from: nil,
|
126
|
+
# the ending date to which the activities will be recovered (replay mode only)
|
127
|
+
to: nil,
|
116
128
|
# called for each message received, except heartbeats
|
117
129
|
on_message: nil,
|
118
130
|
# called for each activity received
|
@@ -158,6 +170,7 @@ module PowerTrack
|
|
158
170
|
gnip_server_port,
|
159
171
|
@account_name,
|
160
172
|
@data_source,
|
173
|
+
@stream_mode,
|
161
174
|
@label,
|
162
175
|
feature ]
|
163
176
|
|
@@ -187,6 +200,7 @@ module PowerTrack
|
|
187
200
|
# Opens a new connection to GNIP PowerTrack.
|
188
201
|
def connect(hostname, feature=nil)
|
189
202
|
url = feature_url(hostname, feature)
|
203
|
+
logger.debug("Connecting to '#{url}' with headers #{connection_headers}...")
|
190
204
|
EventMachine::HttpRequest.new(url, connection_headers)
|
191
205
|
end
|
192
206
|
|
@@ -283,14 +297,14 @@ module PowerTrack
|
|
283
297
|
handle_api_response(resp_status, resp_error, resp_body, options[:ok])
|
284
298
|
end
|
285
299
|
|
286
|
-
# Returns the type of message received on the stream,
|
287
|
-
#
|
300
|
+
# Returns the type of message received on the stream, together with a
|
301
|
+
# level indicator in case of a system message, nil otherwise.
|
288
302
|
def message_type(message)
|
289
303
|
case message
|
290
|
-
when HEARTBEAT_MESSAGE_PATTERN then :heartbeat
|
291
|
-
when SYSTEM_MESSAGE_PATTERN then :system
|
304
|
+
when HEARTBEAT_MESSAGE_PATTERN then [ :heartbeat, nil ]
|
305
|
+
when SYSTEM_MESSAGE_PATTERN then [ :system, $1.downcase.to_sym ]
|
292
306
|
else
|
293
|
-
:activity
|
307
|
+
[ :activity, nil ]
|
294
308
|
end
|
295
309
|
end
|
296
310
|
|
@@ -322,7 +336,25 @@ module PowerTrack
|
|
322
336
|
EM.run do
|
323
337
|
logger.info "Starting the reactor..."
|
324
338
|
con = connect('stream')
|
325
|
-
|
339
|
+
get_opts = { head: track_req_headers(options[:compressed]) }
|
340
|
+
|
341
|
+
# add a timeframe in replay mode
|
342
|
+
if @replay
|
343
|
+
now = Time.now
|
344
|
+
# start 1 hour ago by default
|
345
|
+
from = options[:from] || (now - 60*60)
|
346
|
+
# stop 30 minutes ago by default
|
347
|
+
to = options[:to] || (now - 30*60)
|
348
|
+
|
349
|
+
get_opts[:query] = {
|
350
|
+
'fromDate' => from.utc.strftime(REPLAY_TIMESTAMP_FORMAT),
|
351
|
+
'toDate' => to.utc.strftime(REPLAY_TIMESTAMP_FORMAT)
|
352
|
+
}
|
353
|
+
|
354
|
+
logger.info "Replay mode enabled from '#{from}' to '#{to}'"
|
355
|
+
end
|
356
|
+
|
357
|
+
http = con.get(get_opts)
|
326
358
|
|
327
359
|
# polls to see if the connection should be closed
|
328
360
|
close_watcher = EM.add_periodic_timer(1) do
|
@@ -352,19 +384,21 @@ module PowerTrack
|
|
352
384
|
next
|
353
385
|
end
|
354
386
|
|
355
|
-
# reset retries when some (valid) data are received
|
356
|
-
if retrier.retrying?
|
357
|
-
logger.info "Resetting retries..."
|
358
|
-
retrier.reset!
|
359
|
-
end
|
360
|
-
|
361
387
|
# process the chunk
|
362
388
|
buffer.process(chunk) do |raw|
|
363
389
|
logger.debug "New message received"
|
390
|
+
|
391
|
+
# get the message type and its (optional) level
|
392
|
+
m_type, m_level = message_type(raw)
|
393
|
+
|
394
|
+
# reset retries when some (valid) data are received
|
395
|
+
if retrier.retrying? && m_level != :error
|
396
|
+
logger.info "Resetting retries..."
|
397
|
+
retrier.reset!
|
398
|
+
end
|
399
|
+
|
364
400
|
EM.defer do
|
365
401
|
# select the right message handler(s) according to the message type
|
366
|
-
m_type = message_type(raw)
|
367
|
-
|
368
402
|
if m_type == :heartbeat
|
369
403
|
on_heartbeat.call if on_heartbeat
|
370
404
|
else
|
@@ -401,6 +435,7 @@ module PowerTrack
|
|
401
435
|
resp_status = http_client.response_header.status || DEFAULT_OK_RESPONSE_STATUS
|
402
436
|
resp_error = http_client.error
|
403
437
|
resp_body = http_client.response
|
438
|
+
|
404
439
|
wait_til_defers_finish_and_stop(stop_timeout)
|
405
440
|
end
|
406
441
|
end
|
data/lib/powertrack/version.rb
CHANGED
data/test/minitest_helper.rb
CHANGED
@@ -30,12 +30,13 @@ class Minitest::Test
|
|
30
30
|
end
|
31
31
|
|
32
32
|
# Returns a brand-new stream based on the config found in test/powertrack.yml.
|
33
|
-
def new_stream
|
33
|
+
def new_stream(replay=false)
|
34
34
|
PowerTrack::Stream.new(
|
35
35
|
powertrack_config[:username],
|
36
36
|
powertrack_config[:password],
|
37
37
|
powertrack_config[:account_name],
|
38
38
|
powertrack_config[:data_source],
|
39
|
-
powertrack_config[:stream_label]
|
39
|
+
replay ? 'prod' : powertrack_config[:stream_label],
|
40
|
+
replay: replay)
|
40
41
|
end
|
41
42
|
end
|
data/test/test_manage_rules.rb
CHANGED
@@ -5,7 +5,15 @@ require 'multi_json'
|
|
5
5
|
class TestManageRules < Minitest::Test
|
6
6
|
|
7
7
|
def test_add_then_delete_a_single_rule
|
8
|
-
|
8
|
+
add_then_delete_a_single_rule(false)
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_add_then_delete_a_single_rule_in_replay_mode
|
12
|
+
add_then_delete_a_single_rule(true)
|
13
|
+
end
|
14
|
+
|
15
|
+
def add_then_delete_a_single_rule(replay)
|
16
|
+
stream = new_stream(replay)
|
9
17
|
|
10
18
|
rule = PowerTrack::Rule.new('coke')
|
11
19
|
assert rule.valid?
|
data/test/test_track_stream.rb
CHANGED
@@ -4,8 +4,16 @@ require 'multi_json'
|
|
4
4
|
|
5
5
|
class TestTrackStream < Minitest::Test
|
6
6
|
|
7
|
-
def
|
8
|
-
|
7
|
+
def test_track_realtime_stream
|
8
|
+
track_simple_stream(false)
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_track_replay_stream
|
12
|
+
track_simple_stream(true)
|
13
|
+
end
|
14
|
+
|
15
|
+
def track_simple_stream(replay)
|
16
|
+
stream = new_stream(replay)
|
9
17
|
|
10
18
|
# add a logger
|
11
19
|
stream.logger = Logger.new(STDERR)
|
@@ -23,6 +31,8 @@ class TestTrackStream < Minitest::Test
|
|
23
31
|
received = 0
|
24
32
|
tweeted = 0
|
25
33
|
closed = false
|
34
|
+
from = nil
|
35
|
+
to = nil
|
26
36
|
|
27
37
|
# ready to track
|
28
38
|
on_message = lambda do |message|
|
@@ -34,28 +44,48 @@ class TestTrackStream < Minitest::Test
|
|
34
44
|
on_activity = lambda do |tweet|
|
35
45
|
tweeted += 1
|
36
46
|
end
|
47
|
+
on_system = lambda do |message|
|
48
|
+
$stderr.puts message.inspect
|
49
|
+
end
|
37
50
|
|
38
51
|
close_now = lambda { closed }
|
39
52
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
53
|
+
if replay
|
54
|
+
now = Time.now
|
55
|
+
from = now - 31*60
|
56
|
+
to = now - 30*60
|
57
|
+
delay = to - from
|
58
|
+
else
|
59
|
+
delay = 60
|
60
|
+
Thread.new do
|
61
|
+
$stderr.puts "Time-bomb thread running for #{delay} seconds..."
|
62
|
+
sleep delay
|
63
|
+
$stderr.puts "Time to shut down !"
|
64
|
+
closed = true
|
65
|
+
end
|
46
66
|
end
|
47
67
|
|
48
68
|
started_at = Time.now
|
49
69
|
res = stream.track(on_message: on_message,
|
50
70
|
on_heartbeat: on_heartbeat,
|
51
71
|
on_activity: on_activity,
|
72
|
+
on_system: on_system,
|
52
73
|
close_now: close_now,
|
53
|
-
max_retries:
|
54
|
-
fake_disconnections: 20
|
74
|
+
max_retries: replay ? 0 : 2,
|
75
|
+
fake_disconnections: replay ? nil : 20,
|
76
|
+
from: from,
|
77
|
+
to: to)
|
78
|
+
|
79
|
+
ended_at = Time.now
|
55
80
|
|
56
81
|
assert_nil res
|
57
|
-
assert closed, 'Stream not closed'
|
58
|
-
|
82
|
+
assert replay || closed, 'Stream not closed'
|
83
|
+
|
84
|
+
if replay
|
85
|
+
assert (ended_at - started_at) <= delay
|
86
|
+
else
|
87
|
+
assert (ended_at - started_at) >= delay
|
88
|
+
end
|
59
89
|
|
60
90
|
assert heartbeats > 0, 'No heartbeat received'
|
61
91
|
puts "#{heartbeats} heartbeats received"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: powertrack
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Laurent Farcy
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2016-
|
13
|
+
date: 2016-07-05 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: bundler
|