powertrack 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/.gitignore +41 -0
- data/Gemfile +8 -0
- data/Gemfile.lock +43 -0
- data/LICENSE.md +25 -0
- data/README.md +117 -0
- data/Rakefile +16 -0
- data/TODO.md +82 -0
- data/lib/core_ext/class.rb +7 -0
- data/lib/powertrack.rb +10 -0
- data/lib/powertrack/errors.rb +110 -0
- data/lib/powertrack/rules/rule.rb +140 -0
- data/lib/powertrack/rules/string_extension.rb +9 -0
- data/lib/powertrack/streaming/api.rb +64 -0
- data/lib/powertrack/streaming/data_buffer.rb +36 -0
- data/lib/powertrack/streaming/retrier.rb +70 -0
- data/lib/powertrack/streaming/stream.rb +429 -0
- data/lib/powertrack/version.rb +3 -0
- data/powertrack.gemspec +32 -0
- data/test/minitest_helper.rb +41 -0
- data/test/test_manage_rules.rb +30 -0
- data/test/test_rule.rb +163 -0
- data/test/test_track_stream.rb +72 -0
- metadata +202 -0
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
OGNhMTRiZDk2MDJlN2VjNGE3Zjk1MmVmMTVjNjJiODQxYThkNDdkNg==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
MjVjZTk3NjZhOWMxNTFkMTE4NjQ0MmM5Y2JjOTY0ZDE1MzZkYTUwYg==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
ODk2NGZlOWY5ZTM3N2MzYWZlYzQwNWUzNjhjNmYyYjBhMzUxODMxYzhiNjMz
|
10
|
+
YzNhOTE3NWQ5ZTE2N2M0YTZmOTNkY2M4NmRkMWJjYjM0Yjc2YzQ0OWIzNGI5
|
11
|
+
MGI4ZTg0YzdmMDUzZTYwYjc3MDAwN2FkZmY3ZWQyN2ZjYzhhODc=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
ZjIzZGY3ZWZkNTA0MTJhZGQwZjNjN2FiNTFiZTQ3MmI2NTIzNWEyZGVkNzFj
|
14
|
+
YzczZWY5NTU0MDczNjQyZTA5YzMzMzE1ZmViODkyMDk2M2RjNGU4MTZhMGE2
|
15
|
+
M2VkODJjNzUwY2M3ZTBmOWUwMDM4NTZkYjE0NjFhZGI1YTFhMDI=
|
data/.gitignore
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
/.config
|
4
|
+
/coverage/
|
5
|
+
/InstalledFiles
|
6
|
+
/pkg/
|
7
|
+
/spec/reports/
|
8
|
+
/test/tmp/
|
9
|
+
/test/version_tmp/
|
10
|
+
/tmp/
|
11
|
+
|
12
|
+
## Specific to RubyMotion:
|
13
|
+
.dat*
|
14
|
+
.repl_history
|
15
|
+
build/
|
16
|
+
|
17
|
+
## Documentation cache and generated files:
|
18
|
+
/.yardoc/
|
19
|
+
/_yardoc/
|
20
|
+
/doc/
|
21
|
+
/rdoc/
|
22
|
+
|
23
|
+
## Environment normalisation:
|
24
|
+
/.bundle/
|
25
|
+
/vendor/bundle
|
26
|
+
/lib/bundler/man/
|
27
|
+
|
28
|
+
# for a library or gem, you might want to ignore these files since the code is
|
29
|
+
# intended to run in multiple environments; otherwise, check them in:
|
30
|
+
# Gemfile.lock
|
31
|
+
# .ruby-version
|
32
|
+
# .ruby-gemset
|
33
|
+
|
34
|
+
# unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
|
35
|
+
.rvmrc
|
36
|
+
|
37
|
+
# ignore Eclipse config
|
38
|
+
.project
|
39
|
+
|
40
|
+
# ignore tests-related config file
|
41
|
+
test/powertrack.yml
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
powertrack (1.0.0)
|
5
|
+
em-http-request (~> 1.1)
|
6
|
+
eventmachine (~> 1.0)
|
7
|
+
exponential-backoff (~> 0.0.2)
|
8
|
+
multi_json (~> 1.11)
|
9
|
+
void_logger (~> 0.1)
|
10
|
+
|
11
|
+
GEM
|
12
|
+
remote: https://rubygems.org/
|
13
|
+
specs:
|
14
|
+
addressable (2.3.8)
|
15
|
+
cookiejar (0.3.2)
|
16
|
+
em-http-request (1.1.2)
|
17
|
+
addressable (>= 2.3.4)
|
18
|
+
cookiejar
|
19
|
+
em-socksify (>= 0.3)
|
20
|
+
eventmachine (>= 1.0.3)
|
21
|
+
http_parser.rb (>= 0.6.0)
|
22
|
+
em-socksify (0.3.0)
|
23
|
+
eventmachine (>= 1.0.0.beta.4)
|
24
|
+
eventmachine (1.0.7)
|
25
|
+
exponential-backoff (0.0.2)
|
26
|
+
http_parser.rb (0.6.0)
|
27
|
+
minitest (5.7.0)
|
28
|
+
multi_json (1.11.2)
|
29
|
+
rake (10.4.2)
|
30
|
+
ruby-prof (0.15.8)
|
31
|
+
void_logger (0.1)
|
32
|
+
yajl-ruby (1.2.1)
|
33
|
+
|
34
|
+
PLATFORMS
|
35
|
+
ruby
|
36
|
+
|
37
|
+
DEPENDENCIES
|
38
|
+
bundler (~> 1.7)
|
39
|
+
minitest (~> 5.5)
|
40
|
+
powertrack!
|
41
|
+
rake (~> 10.3)
|
42
|
+
ruby-prof (~> 0.15)
|
43
|
+
yajl-ruby (~> 1.0)
|
data/LICENSE.md
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
## The MIT License (MIT)
|
2
|
+
|
3
|
+
- Copyright (c) 2015 eCairn Inc.
|
4
|
+
- Copyright (c) 2012-2015 Eric Wendelin
|
5
|
+
- Copyright (c) 2011-2015 Ryan Weald / Sharethrough
|
6
|
+
|
7
|
+
```
|
8
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
9
|
+
of this software and associated documentation files (the "Software"), to deal
|
10
|
+
in the Software without restriction, including without limitation the rights
|
11
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
12
|
+
copies of the Software, and to permit persons to whom the Software is
|
13
|
+
furnished to do so, subject to the following conditions:
|
14
|
+
|
15
|
+
The above copyright notice and this permission notice shall be included in all
|
16
|
+
copies or substantial portions of the Software.
|
17
|
+
|
18
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
19
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
20
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
21
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
22
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
23
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
24
|
+
SOFTWARE.
|
25
|
+
```
|
data/README.md
ADDED
@@ -0,0 +1,117 @@
|
|
1
|
+
# powertrack-rb
|
2
|
+
A Ruby gem for building GNIP PowerTrack streaming clients.
|
3
|
+
|
4
|
+
## How to use it ?
|
5
|
+
|
6
|
+
1. Create a PowerTrack stream based on your credentials
|
7
|
+
|
8
|
+
```ruby
|
9
|
+
require 'powertrack'
|
10
|
+
|
11
|
+
stream = PowerTrack::Stream.new(
|
12
|
+
powertrack_config[:username],
|
13
|
+
powertrack_config[:password],
|
14
|
+
powertrack_config[:account_name],
|
15
|
+
powertrack_config[:data_source], # often 'twitter'
|
16
|
+
powertrack_config[:stream_label]) # often 'prod'
|
17
|
+
```
|
18
|
+
|
19
|
+
2. Add a few rules to the stream
|
20
|
+
|
21
|
+
```ruby
|
22
|
+
rule = PowerTrack::Rule.new('coke')
|
23
|
+
if rule.valid?
|
24
|
+
stream.add_rule(rule)
|
25
|
+
# double-check that the rule was actually added
|
26
|
+
raise 'Fail to add a rule' unless stream.list_rules.include?(rule)
|
27
|
+
end
|
28
|
+
```
|
29
|
+
|
30
|
+
3. Get the activities out of the stream
|
31
|
+
|
32
|
+
```ruby
|
33
|
+
received, heartbeats = 0, 0
|
34
|
+
activities = []
|
35
|
+
|
36
|
+
## defining callbacks on messages received
|
37
|
+
# callback triggered for each message received
|
38
|
+
on_message = lambda { |message| received += 1 }
|
39
|
+
# callback triggered for each heartbeat received
|
40
|
+
on_heartbeat = lambda { heartbeats += 1 }
|
41
|
+
# callback triggered for each activity received
|
42
|
+
on_activity = lambda { |activity| activities += activity }
|
43
|
+
|
44
|
+
## defining the block that will command the stop of the tracking
|
45
|
+
closed = false
|
46
|
+
close_now = lambda { closed }
|
47
|
+
|
48
|
+
delay = 60
|
49
|
+
Thread.new do
|
50
|
+
$stderr.puts "Time-bomb thread running for #{delay} seconds..."
|
51
|
+
sleep delay
|
52
|
+
$stderr.puts "Time to shut down !"
|
53
|
+
closed = true
|
54
|
+
end
|
55
|
+
|
56
|
+
started_at = Time.now
|
57
|
+
res = stream.track(on_message: on_message,
|
58
|
+
on_heartbeat: on_heartbeat,
|
59
|
+
on_activity: on_activity,
|
60
|
+
close_now: close_now)
|
61
|
+
|
62
|
+
puts "After #{delay} seconds tracking '#{rule.value}':"
|
63
|
+
puts " o #{received} messages received"
|
64
|
+
puts " o #{heartbeats} heartbeats received"
|
65
|
+
puts " o #{activities.size} activities captured"
|
66
|
+
```
|
67
|
+
|
68
|
+
Please note that each message callback must be thread-safe since it can be called
|
69
|
+
multiple times simultaneously.
|
70
|
+
|
71
|
+
## Tracking response format
|
72
|
+
|
73
|
+
By default, messages received are passed to callbacks as plain Ruby objects. Enable
|
74
|
+
the ```raw``` option to get raw JSON-formatted string and make the parsing by
|
75
|
+
yourself.
|
76
|
+
|
77
|
+
## Stop tracking
|
78
|
+
|
79
|
+
The tracker calls the ```close_now``` block each second and stops whenever the call
|
80
|
+
returns true. The stop procedure includes an additional timeframe where the tracker
|
81
|
+
waits for each pending message to be completely processed.
|
82
|
+
|
83
|
+
It's up to the developer's responsibility to complete message processing as soon as
|
84
|
+
possible. After 10 seconds (by default), the stop will be forced and a few messages
|
85
|
+
already received but not processed yet may be lost.
|
86
|
+
|
87
|
+
The ```:stop_timeout``` may be fine-tune when passing options to the tracker.
|
88
|
+
|
89
|
+
## Disconnections and Retries
|
90
|
+
|
91
|
+
As highly recommended by GNIP, the PowerTrack::Stream client manages an exponential
|
92
|
+
backoff retry mechanism when a disconnection happens. The reconnections can be
|
93
|
+
fine-tuned through the ```max_retries``` and ```backoff``` options passed to the
|
94
|
+
```track``` call.
|
95
|
+
|
96
|
+
## Backfill
|
97
|
+
|
98
|
+
Backfill is a feature provided by GNIP to avoid losing activities when being
|
99
|
+
disconnected. It automatically resends the messages sent on the stream for the
|
100
|
+
last 5 minutes when reconnecting.
|
101
|
+
|
102
|
+
Provide a (numerical) client id as the last (but optional) argument of the
|
103
|
+
PowerTrack::Stream constructor to enable this feature.
|
104
|
+
|
105
|
+
## Errors
|
106
|
+
|
107
|
+
All the errors that come from PowerTrack are defined through an ad-hoc exception
|
108
|
+
class hierarchy. See ```lib/powertrack/errors.rb```.
|
109
|
+
|
110
|
+
## Credits
|
111
|
+
|
112
|
+
The ```powertrack``` gem heavily relies on *EventMachine* and the *em-http-request*
|
113
|
+
companion gem. It also got inspiration from a few other gems
|
114
|
+
|
115
|
+
* The [gnip-rule](https://github.com/singlebrook/gnip-rule) gem
|
116
|
+
* The [gnip-stream](https://github.com/rweald/gnip-stream) gem
|
117
|
+
* The [exponential-backoff](https://github.com/pawelpacana/exponential-backoff) gem
|
data/Rakefile
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'bundler/gem_tasks'
|
2
|
+
require 'rake/testtask'
|
3
|
+
|
4
|
+
Rake::TestTask.new(:test) do |t|
|
5
|
+
t.libs << 'test'
|
6
|
+
end
|
7
|
+
|
8
|
+
require 'rdoc/task'
|
9
|
+
namespace :doc do
|
10
|
+
RDoc::Task.new do |rd|
|
11
|
+
rd.rdoc_dir = 'doc'
|
12
|
+
rd.rdoc_files.include('lib/**/*.rb')
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
task :default => :test
|
data/TODO.md
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
## General
|
2
|
+
|
3
|
+
* _[DONE]_ Rely upon MultiJson for JSON encoding and decoding
|
4
|
+
* Support thread-safe streams
|
5
|
+
|
6
|
+
A single stream used in several threads to perform several simultaneous actions
|
7
|
+
like consuming the stream while changing its rules.
|
8
|
+
|
9
|
+
It's currently impossible due to EventMachine. A transition to Celluloid::IO and
|
10
|
+
http.rb is required to be thread-friendly.
|
11
|
+
|
12
|
+
## Rules
|
13
|
+
|
14
|
+
* _[DONE]_ Check rule size
|
15
|
+
* _[DONE]_ Add 1 or more rules to a stream
|
16
|
+
* _[DONE]_ Delete some rules from the stream
|
17
|
+
* _[DONE]_ Get all existing rules for a stream
|
18
|
+
* _[DONE]_ Rules equality and usage in hash as keys
|
19
|
+
* Rule encoding (UTF-8 enforcement ?)
|
20
|
+
* Double check a rule supports all the syntactical and semantic restrictions
|
21
|
+
as defined by GNIP [PowerTrack Rules](http://support.gnip.com/apis/powertrack/rules.html#Restrictions)
|
22
|
+
reference documentation
|
23
|
+
* Support evolution of rules in terms of addition, removal and updates.
|
24
|
+
|
25
|
+
## Real-time PowerTrack
|
26
|
+
|
27
|
+
* _[DONE]_ Manage persitent connection to a data stream. See
|
28
|
+
[Powertrack API reference](http://support.gnip.com/apis/powertrack/api_reference.html)
|
29
|
+
* _[DONE]_ [Consume streaming data](http://support.gnip.com/apis/consuming_streaming_data.html)
|
30
|
+
* _[DONE]_ Capture heartbeat activities
|
31
|
+
* _[DONE]_ Capture system-related activities
|
32
|
+
|
33
|
+
## Compliance activities
|
34
|
+
|
35
|
+
See [Honoring user intent on Twitter](http://support.gnip.com/articles/honoring-user-intent-on-twitter.html)
|
36
|
+
and [Compliance Activities](http://support.gnip.com/sources/twitter/data_format.html#ComplianceActivities).
|
37
|
+
|
38
|
+
* _[DROPPED]_ Add a comply method to PowerTrack::API ?
|
39
|
+
The compliance activities are broadcasted on a specific compliance stream.
|
40
|
+
* Support the Compliance Firehose stream
|
41
|
+
[Compliance Firehose Reference](http://support.gnip.com/apis/compliance_firehose/api_reference.html)
|
42
|
+
|
43
|
+
### Account
|
44
|
+
|
45
|
+
* Protect / Unprotect account
|
46
|
+
* Delete account
|
47
|
+
* Scrub geo
|
48
|
+
* Suspend account
|
49
|
+
* Withhold account
|
50
|
+
|
51
|
+
### Status
|
52
|
+
|
53
|
+
* Delete status
|
54
|
+
* Withhold status
|
55
|
+
|
56
|
+
## Data formats
|
57
|
+
|
58
|
+
See [Data format](http://support.gnip.com/sources/twitter/data_format.html)
|
59
|
+
|
60
|
+
* _[DONE]_ Support Original output format
|
61
|
+
* _[DONE]_ Support Activity Stream output format
|
62
|
+
* _[DONE]_ Support raw format
|
63
|
+
*
|
64
|
+
* _[OUT]_ Manage retweets.
|
65
|
+
See [Identifying and Understanding retweets](http://support.gnip.com/articles/identifying-and-understanding-retweets.html)
|
66
|
+
|
67
|
+
## Disconnections
|
68
|
+
|
69
|
+
See [Managing disconnections](http://support.gnip.com/articles/disconnections-explained.html)
|
70
|
+
|
71
|
+
* _[DONE]_ Reconnect after disconnect. See
|
72
|
+
[Disconnections & Reconnecting](http://support.gnip.com/apis/consuming_streaming_data.html#Disconnections)
|
73
|
+
* _[DONE]_ Reconnect using an exponential backoff pattern.
|
74
|
+
* _[DONE]_ Support Backfill
|
75
|
+
* Support Replay
|
76
|
+
* Reconnect when there's a GNIP server issue signaled by the 503 HTTP response status
|
77
|
+
|
78
|
+
## Other features
|
79
|
+
|
80
|
+
* _[DONE]_ Support test and development streams
|
81
|
+
* Support status dashboard
|
82
|
+
* Support Historical Powertrack
|
data/lib/powertrack.rb
ADDED
@@ -0,0 +1,110 @@
|
|
1
|
+
module PowerTrack
|
2
|
+
# Base PowerTrack error, capable of wrapping another
|
3
|
+
class BasePowerTrackError < StandardError
|
4
|
+
attr_reader :status, :body
|
5
|
+
|
6
|
+
def initialize(status, msg, body=nil)
|
7
|
+
msg ||= body
|
8
|
+
_status = "#{status}".strip
|
9
|
+
_msg = "#{msg}".strip
|
10
|
+
err = [ _status, _msg ].select { |part| !part.empty? }.join(': ')
|
11
|
+
super(err)
|
12
|
+
@status = status
|
13
|
+
@body = body
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
# Base class for PowerTrack errors without a precise status
|
18
|
+
class NoStatusPowerTrackError < BasePowerTrackError
|
19
|
+
def initialize(message, body)
|
20
|
+
super(nil, message, body)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
# An error which is raised when there is a connection issue with the PowerTrack
|
25
|
+
# endpoint
|
26
|
+
class ConnectionError < NoStatusPowerTrackError
|
27
|
+
def initialize(message)
|
28
|
+
super(message, nil)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# Base class for PowerTrack errors with a precise status
|
33
|
+
class WithStatusPowerTrackError < BasePowerTrackError
|
34
|
+
# Factory method which returns an error instance based on a given status.
|
35
|
+
def self.build(status, message, body)
|
36
|
+
@@status_to_error_class ||= Hash[*self.descendants.map { |desc|
|
37
|
+
[ desc.new(nil, nil).status, desc ] }.flatten ]
|
38
|
+
if @@status_to_error_class.key?(status)
|
39
|
+
@@status_to_error_class[status].new(message, body)
|
40
|
+
else
|
41
|
+
# default to unknown status error
|
42
|
+
UnknownStatusError.new(status, message, body)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
# An exception which is raised when the response received from PowerTrack is
|
48
|
+
# invalid, poorly formatted in most cases.
|
49
|
+
class InvalidResponseError < WithStatusPowerTrackError
|
50
|
+
end
|
51
|
+
|
52
|
+
# An exception which is raised when PowerTrack returns an unknown HTTP status code.
|
53
|
+
class UnknownStatusError < WithStatusPowerTrackError
|
54
|
+
end
|
55
|
+
|
56
|
+
# Base class for errors which match a well-defined HTTP status code as
|
57
|
+
# documented in the PowerTrack API reference.
|
58
|
+
class PredefinedStatusPowerTrackError < WithStatusPowerTrackError
|
59
|
+
end
|
60
|
+
|
61
|
+
# Generally relates to poorly formatted JSON, and includes an "Invalid JSON"
|
62
|
+
# message in the response.
|
63
|
+
class BadRequestError < PredefinedStatusPowerTrackError
|
64
|
+
def initialize(message, body)
|
65
|
+
super(400, message, body)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
# HTTP authentication failed due to invalid credentials.
|
70
|
+
class UnauthorizedError < PredefinedStatusPowerTrackError
|
71
|
+
def initialize(message, body)
|
72
|
+
super(401, message, body)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# Generally, this occurs where your client fails to properly include the
|
77
|
+
# headers to accept gzip encoding from the stream, but can occur in other
|
78
|
+
# circumstances as well.
|
79
|
+
#
|
80
|
+
# Will contain a JSON message similar to "This connection requires
|
81
|
+
# compression. To enable compression, send an 'Accept-Encoding: gzip' header
|
82
|
+
# in your request and be ready to uncompress the stream as it is read on
|
83
|
+
# the client end."
|
84
|
+
class NotAcceptableError < PredefinedStatusPowerTrackError
|
85
|
+
def initialize(message, body)
|
86
|
+
super(406, message, body)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
class UnprocessableEntityError < PredefinedStatusPowerTrackError
|
91
|
+
def initialize(message, body)
|
92
|
+
super(422, message, body)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
# Your app has exceeded the limit on connection requests.
|
97
|
+
class RateLimitedError < PredefinedStatusPowerTrackError
|
98
|
+
def initialize(message, body)
|
99
|
+
super(429, message, body)
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
# Gnip server issue. If no notice about this issue has been posted on
|
104
|
+
# status.gnip.com, email support@gnip.com.
|
105
|
+
class ServiceUnavailableError < PredefinedStatusPowerTrackError
|
106
|
+
def initialize(message, body)
|
107
|
+
super(503, message, body)
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|