powertrack 1.0.3 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- ZjgwZmUwMGNlNDJmMWFkZmQ2NWI1Y2I5NTJmZDIyMmVkODliZDI5YQ==
4
+ NjZjMjc0OWE1ODU4NTUyNTY2Zjk1YzdiMjYxZDgxMjYwMTI0MmQyMg==
5
5
  data.tar.gz: !binary |-
6
- NzkxMWY4YzRjZjY4NTkwMjljZDBlY2YzNTQxYzEwNWM0YWIxMTE4ZQ==
6
+ MmEzNDQ0YWYzZGYwODcwNDM0MDU5MDc1YjdlZTIwMTRjMmRhNzZlYQ==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- OTA4YTE2OTAwNmUwZmQyYzNhODM3MTI3Y2ViMzYyODhmNmY3ZThkYTQ4ZTYx
10
- MTkwNjYyMzZhNmY1NjRhZmY3YjU2YmE2YjMwZjBiNGQ3M2RkODEyNmM4OTY5
11
- OGI1NmJhOTZhZjAyODQxMmM5NzM2ZTJkZWNmOTg4NDcyMDQxZGQ=
9
+ NTU5ZmE3MzI3NzUzYTYxYzgyNGZiYzBkYjllODdiMTA5M2YyYWIyN2Y3M2E2
10
+ M2ZhNDRmZWFlNTE1ZTI4YTA0OGZiNDg4NDFiYWQwOWRiNGMwNjhlMTIzZWRi
11
+ NGNhNzFiZTIyM2Q3Y2I4OTUzYmQ0ZmRmYjQ4ODRiODhkMjUzMzg=
12
12
  data.tar.gz: !binary |-
13
- MmRiZmFiOWM3NjI4NWMzYWE5NDA1MDU2YjZjMjIzNGMwMjhmYzcwZDc5Yzcw
14
- Mjc0MDM4YmQ4ZGUxMTI4OTZmMjM0YzNiMDVhZjExNTMwNWQzODI0YzNjODNm
15
- N2Q4YzE4MDAxMWM4YjAzNTc1YTI5NTgxOGJjZTZhNDQyNThjMGI=
13
+ YzQxZjcxMDVmYTQzZmRlODNjMWIyNmNlNDczNWUyMTQzNzM5MmRlZDk2ZDJh
14
+ ZDhkNmU3MzRmMTFiZjBlM2RiZjYzZmQ4MzQ1MjI1YThjZGVmYjdjMWZjZTI0
15
+ OGUxYzYwY2JlY2Q3NDMwOWQ1N2NlMjg1MDI1ZjU2NzU5NGQzOGE=
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- powertrack (1.0.3)
4
+ powertrack (1.1.0)
5
5
  em-http-request (~> 1.1)
6
6
  eventmachine (~> 1.0)
7
7
  exponential-backoff (~> 0.0.2)
data/History.txt CHANGED
@@ -1,10 +1,15 @@
1
- 1.0.3
2
- -----
1
+ v1.1.0
2
+ ------
3
+
4
+ * Add support for Replay feature
5
+
6
+ v1.0.3
7
+ ------
3
8
 
4
9
  * Retrieve the on_system function from options
5
10
 
6
- 1.0.2
7
- -----
11
+ v1.0.2
12
+ ------
8
13
 
9
14
  * to_json can get options in order for powertrack to support the standard
10
15
  JSON library (through the MultiJson gem). Thanks to @duncanita for the PR.
data/README.md CHANGED
@@ -90,8 +90,8 @@ The ```:stop_timeout``` may be fine-tune when passing options to the tracker.
90
90
 
91
91
  As highly recommended by GNIP, the PowerTrack::Stream client manages an exponential
92
92
  backoff retry mechanism when a disconnection happens. The reconnections can be
93
- fine-tuned through the ```max_retries``` and ```backoff``` options passed to the
94
- ```track``` call.
93
+ fine-tuned through the ```:max_retries``` and ```:backoff``` options passed to
94
+ the ```track``` call.
95
95
 
96
96
  ## Backfill
97
97
 
@@ -102,6 +102,19 @@ last 5 minutes when reconnecting.
102
102
  Provide a (numerical) client id as the last (but optional) argument of the
103
103
  PowerTrack::Stream constructor to enable this feature.
104
104
 
105
+ ## Replay
106
+
107
+ Replay is a feature provided by GNIP to recover lost activities over the last
108
+ 5 days. The Replay stream lives aside the realtime stream and is activated
109
+ by setting the ```:replay``` option to true when building a ```PowerTrack::Stream```
110
+ object.
111
+
112
+ Once Replay is activated, you use the stream as previously, starting by
113
+ configuring some rules that define which activities you will recover. Once done,
114
+ you can track the stream by specifying a timeframe with the ```:from```
115
+ and ```:to options```. By default, replay happens over 30 minutes, starting 1
116
+ hour ago.
117
+
105
118
  ## Errors
106
119
 
107
120
  All the errors that come from PowerTrack are defined through an ad-hoc exception
data/TODO.md CHANGED
@@ -78,5 +78,6 @@ See [Managing disconnections](http://support.gnip.com/articles/disconnections-ex
78
78
  ## Other features
79
79
 
80
80
  * _[DONE]_ Support test and development streams
81
+ * _[DONE]_ Support Replay mode (5-days back history)
81
82
  * Support status dashboard
82
83
  * Support Historical Powertrack
@@ -56,7 +56,7 @@ module PowerTrack
56
56
  # retrier.
57
57
  def retry(&block)
58
58
  # TODO: manage exceptions
59
- while @continue && @retries < @max_retries
59
+ while @continue && @retries <= @max_retries
60
60
  res = yield
61
61
  if @continue
62
62
  @retries += 1
@@ -18,7 +18,7 @@ module PowerTrack
18
18
  include VoidLogger::LoggerMixin
19
19
 
20
20
  # The format of the URLs to connect to the various stream services
21
- FEATURE_URL_FORMAT = "https://%s:%s/accounts/%s/publishers/%s/streams/track/%s%s.json".freeze
21
+ FEATURE_URL_FORMAT = "https://%s:%s/accounts/%s/publishers/%s/%s/track/%s%s.json".freeze
22
22
 
23
23
  # The default timeout on a connection to PowerTrack. Can be overriden per call.
24
24
  DEFAULT_CONNECTION_TIMEOUT = 30
@@ -32,16 +32,22 @@ module PowerTrack
32
32
  connect_timeout: DEFAULT_CONNECTION_TIMEOUT,
33
33
  inactivity_timeout: DEFAULT_INACTIVITY_TIMEOUT,
34
34
  # use a client id if you want to leverage the Backfill feature
35
- client_id: nil
35
+ client_id: nil,
36
+ # enable the replay mode to get activities over the last 5 days
37
+ # see http://support.gnip.com/apis/replay/api_reference.html
38
+ replay: false
36
39
  }
37
40
 
38
41
  DEFAULT_OK_RESPONSE_STATUS = 200
39
42
 
40
- # the patterns used to identify the various types of message received from GNIP
43
+ # The patterns used to identify the various types of message received from GNIP
41
44
  # everything else is an activity
42
45
  HEARTBEAT_MESSAGE_PATTERN = /\A\s*\z/
43
46
  SYSTEM_MESSAGE_PATTERN = /\A\s*\{\s*"(info|warn|error)":/mi
44
47
 
48
+ # The format used to send UTC timestamps in Replay mode
49
+ REPLAY_TIMESTAMP_FORMAT = '%Y%m%d%H%M'
50
+
45
51
  attr_reader :username, :account_name, :data_source, :label
46
52
 
47
53
  def initialize(username, password, account_name, data_source, label, options=nil)
@@ -52,6 +58,8 @@ module PowerTrack
52
58
  @label = label
53
59
  @options = DEFAULT_STREAM_OPTIONS.merge(options || {})
54
60
  @client_id = @options[:client_id]
61
+ @replay = !!@options[:replay]
62
+ @stream_mode = @replay ? 'replay' : 'streams'
55
63
  end
56
64
 
57
65
  # Adds many rules to your PowerTrack stream’s ruleset.
@@ -105,7 +113,7 @@ module PowerTrack
105
113
  # receive GZip-compressed payloads ?
106
114
  compressed: true,
107
115
  # max number of retries after a disconnection
108
- max_retries: 3,
116
+ max_retries: 2,
109
117
  # advanced options to configure exponential backoff used for retries
110
118
  backoff: nil,
111
119
  # max number of seconds to wait for last message handlers to complete
@@ -113,6 +121,10 @@ module PowerTrack
113
121
  # pass message in raw form (JSON formatted string) instead of JSON-decoded
114
122
  # Ruby objects to message handlers
115
123
  raw: false,
124
+ # the starting date from which the activities will be recovered (replay mode only)
125
+ from: nil,
126
+ # the ending date to which the activities will be recovered (replay mode only)
127
+ to: nil,
116
128
  # called for each message received, except heartbeats
117
129
  on_message: nil,
118
130
  # called for each activity received
@@ -158,6 +170,7 @@ module PowerTrack
158
170
  gnip_server_port,
159
171
  @account_name,
160
172
  @data_source,
173
+ @stream_mode,
161
174
  @label,
162
175
  feature ]
163
176
 
@@ -187,6 +200,7 @@ module PowerTrack
187
200
  # Opens a new connection to GNIP PowerTrack.
188
201
  def connect(hostname, feature=nil)
189
202
  url = feature_url(hostname, feature)
203
+ logger.debug("Connecting to '#{url}' with headers #{connection_headers}...")
190
204
  EventMachine::HttpRequest.new(url, connection_headers)
191
205
  end
192
206
 
@@ -283,14 +297,14 @@ module PowerTrack
283
297
  handle_api_response(resp_status, resp_error, resp_body, options[:ok])
284
298
  end
285
299
 
286
- # Returns the type of message received on the stream, nil when the type
287
- # cannot be identified.
300
+ # Returns the type of message received on the stream, together with a
301
+ # level indicator in case of a system message, nil otherwise.
288
302
  def message_type(message)
289
303
  case message
290
- when HEARTBEAT_MESSAGE_PATTERN then :heartbeat
291
- when SYSTEM_MESSAGE_PATTERN then :system
304
+ when HEARTBEAT_MESSAGE_PATTERN then [ :heartbeat, nil ]
305
+ when SYSTEM_MESSAGE_PATTERN then [ :system, $1.downcase.to_sym ]
292
306
  else
293
- :activity
307
+ [ :activity, nil ]
294
308
  end
295
309
  end
296
310
 
@@ -322,7 +336,25 @@ module PowerTrack
322
336
  EM.run do
323
337
  logger.info "Starting the reactor..."
324
338
  con = connect('stream')
325
- http = con.get(head: track_req_headers(options[:compressed]))
339
+ get_opts = { head: track_req_headers(options[:compressed]) }
340
+
341
+ # add a timeframe in replay mode
342
+ if @replay
343
+ now = Time.now
344
+ # start 1 hour ago by default
345
+ from = options[:from] || (now - 60*60)
346
+ # stop 30 minutes ago by default
347
+ to = options[:to] || (now - 30*60)
348
+
349
+ get_opts[:query] = {
350
+ 'fromDate' => from.utc.strftime(REPLAY_TIMESTAMP_FORMAT),
351
+ 'toDate' => to.utc.strftime(REPLAY_TIMESTAMP_FORMAT)
352
+ }
353
+
354
+ logger.info "Replay mode enabled from '#{from}' to '#{to}'"
355
+ end
356
+
357
+ http = con.get(get_opts)
326
358
 
327
359
  # polls to see if the connection should be closed
328
360
  close_watcher = EM.add_periodic_timer(1) do
@@ -352,19 +384,21 @@ module PowerTrack
352
384
  next
353
385
  end
354
386
 
355
- # reset retries when some (valid) data are received
356
- if retrier.retrying?
357
- logger.info "Resetting retries..."
358
- retrier.reset!
359
- end
360
-
361
387
  # process the chunk
362
388
  buffer.process(chunk) do |raw|
363
389
  logger.debug "New message received"
390
+
391
+ # get the message type and its (optional) level
392
+ m_type, m_level = message_type(raw)
393
+
394
+ # reset retries when some (valid) data are received
395
+ if retrier.retrying? && m_level != :error
396
+ logger.info "Resetting retries..."
397
+ retrier.reset!
398
+ end
399
+
364
400
  EM.defer do
365
401
  # select the right message handler(s) according to the message type
366
- m_type = message_type(raw)
367
-
368
402
  if m_type == :heartbeat
369
403
  on_heartbeat.call if on_heartbeat
370
404
  else
@@ -401,6 +435,7 @@ module PowerTrack
401
435
  resp_status = http_client.response_header.status || DEFAULT_OK_RESPONSE_STATUS
402
436
  resp_error = http_client.error
403
437
  resp_body = http_client.response
438
+
404
439
  wait_til_defers_finish_and_stop(stop_timeout)
405
440
  end
406
441
  end
@@ -1,3 +1,3 @@
1
1
  module PowerTrack
2
- VERSION = '1.0.3'
2
+ VERSION = '1.1.0'
3
3
  end
@@ -30,12 +30,13 @@ class Minitest::Test
30
30
  end
31
31
 
32
32
  # Returns a brand-new stream based on the config found in test/powertrack.yml.
33
- def new_stream
33
+ def new_stream(replay=false)
34
34
  PowerTrack::Stream.new(
35
35
  powertrack_config[:username],
36
36
  powertrack_config[:password],
37
37
  powertrack_config[:account_name],
38
38
  powertrack_config[:data_source],
39
- powertrack_config[:stream_label])
39
+ replay ? 'prod' : powertrack_config[:stream_label],
40
+ replay: replay)
40
41
  end
41
42
  end
@@ -5,7 +5,15 @@ require 'multi_json'
5
5
  class TestManageRules < Minitest::Test
6
6
 
7
7
  def test_add_then_delete_a_single_rule
8
- stream = new_stream
8
+ add_then_delete_a_single_rule(false)
9
+ end
10
+
11
+ def test_add_then_delete_a_single_rule_in_replay_mode
12
+ add_then_delete_a_single_rule(true)
13
+ end
14
+
15
+ def add_then_delete_a_single_rule(replay)
16
+ stream = new_stream(replay)
9
17
 
10
18
  rule = PowerTrack::Rule.new('coke')
11
19
  assert rule.valid?
@@ -4,8 +4,16 @@ require 'multi_json'
4
4
 
5
5
  class TestTrackStream < Minitest::Test
6
6
 
7
- def test_track_simple_stream
8
- stream = new_stream
7
+ def test_track_realtime_stream
8
+ track_simple_stream(false)
9
+ end
10
+
11
+ def test_track_replay_stream
12
+ track_simple_stream(true)
13
+ end
14
+
15
+ def track_simple_stream(replay)
16
+ stream = new_stream(replay)
9
17
 
10
18
  # add a logger
11
19
  stream.logger = Logger.new(STDERR)
@@ -23,6 +31,8 @@ class TestTrackStream < Minitest::Test
23
31
  received = 0
24
32
  tweeted = 0
25
33
  closed = false
34
+ from = nil
35
+ to = nil
26
36
 
27
37
  # ready to track
28
38
  on_message = lambda do |message|
@@ -34,28 +44,48 @@ class TestTrackStream < Minitest::Test
34
44
  on_activity = lambda do |tweet|
35
45
  tweeted += 1
36
46
  end
47
+ on_system = lambda do |message|
48
+ $stderr.puts message.inspect
49
+ end
37
50
 
38
51
  close_now = lambda { closed }
39
52
 
40
- delay = 60
41
- Thread.new do
42
- $stderr.puts "Time-bomb thread running for #{delay} seconds..."
43
- sleep delay
44
- $stderr.puts "Time to shut down !"
45
- closed = true
53
+ if replay
54
+ now = Time.now
55
+ from = now - 31*60
56
+ to = now - 30*60
57
+ delay = to - from
58
+ else
59
+ delay = 60
60
+ Thread.new do
61
+ $stderr.puts "Time-bomb thread running for #{delay} seconds..."
62
+ sleep delay
63
+ $stderr.puts "Time to shut down !"
64
+ closed = true
65
+ end
46
66
  end
47
67
 
48
68
  started_at = Time.now
49
69
  res = stream.track(on_message: on_message,
50
70
  on_heartbeat: on_heartbeat,
51
71
  on_activity: on_activity,
72
+ on_system: on_system,
52
73
  close_now: close_now,
53
- max_retries: 3,
54
- fake_disconnections: 20)
74
+ max_retries: replay ? 0 : 2,
75
+ fake_disconnections: replay ? nil : 20,
76
+ from: from,
77
+ to: to)
78
+
79
+ ended_at = Time.now
55
80
 
56
81
  assert_nil res
57
- assert closed, 'Stream not closed'
58
- assert Time.now - started_at >= delay
82
+ assert replay || closed, 'Stream not closed'
83
+
84
+ if replay
85
+ assert (ended_at - started_at) <= delay
86
+ else
87
+ assert (ended_at - started_at) >= delay
88
+ end
59
89
 
60
90
  assert heartbeats > 0, 'No heartbeat received'
61
91
  puts "#{heartbeats} heartbeats received"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: powertrack
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.3
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Laurent Farcy
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2016-02-22 00:00:00.000000000 Z
13
+ date: 2016-07-05 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: bundler