powertrack 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/.gitignore +41 -0
- data/Gemfile +8 -0
- data/Gemfile.lock +43 -0
- data/LICENSE.md +25 -0
- data/README.md +117 -0
- data/Rakefile +16 -0
- data/TODO.md +82 -0
- data/lib/core_ext/class.rb +7 -0
- data/lib/powertrack.rb +10 -0
- data/lib/powertrack/errors.rb +110 -0
- data/lib/powertrack/rules/rule.rb +140 -0
- data/lib/powertrack/rules/string_extension.rb +9 -0
- data/lib/powertrack/streaming/api.rb +64 -0
- data/lib/powertrack/streaming/data_buffer.rb +36 -0
- data/lib/powertrack/streaming/retrier.rb +70 -0
- data/lib/powertrack/streaming/stream.rb +429 -0
- data/lib/powertrack/version.rb +3 -0
- data/powertrack.gemspec +32 -0
- data/test/minitest_helper.rb +41 -0
- data/test/test_manage_rules.rb +30 -0
- data/test/test_rule.rb +163 -0
- data/test/test_track_stream.rb +72 -0
- metadata +202 -0
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
OGNhMTRiZDk2MDJlN2VjNGE3Zjk1MmVmMTVjNjJiODQxYThkNDdkNg==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
MjVjZTk3NjZhOWMxNTFkMTE4NjQ0MmM5Y2JjOTY0ZDE1MzZkYTUwYg==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
ODk2NGZlOWY5ZTM3N2MzYWZlYzQwNWUzNjhjNmYyYjBhMzUxODMxYzhiNjMz
|
10
|
+
YzNhOTE3NWQ5ZTE2N2M0YTZmOTNkY2M4NmRkMWJjYjM0Yjc2YzQ0OWIzNGI5
|
11
|
+
MGI4ZTg0YzdmMDUzZTYwYjc3MDAwN2FkZmY3ZWQyN2ZjYzhhODc=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
ZjIzZGY3ZWZkNTA0MTJhZGQwZjNjN2FiNTFiZTQ3MmI2NTIzNWEyZGVkNzFj
|
14
|
+
YzczZWY5NTU0MDczNjQyZTA5YzMzMzE1ZmViODkyMDk2M2RjNGU4MTZhMGE2
|
15
|
+
M2VkODJjNzUwY2M3ZTBmOWUwMDM4NTZkYjE0NjFhZGI1YTFhMDI=
|
data/.gitignore
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
/.config
|
4
|
+
/coverage/
|
5
|
+
/InstalledFiles
|
6
|
+
/pkg/
|
7
|
+
/spec/reports/
|
8
|
+
/test/tmp/
|
9
|
+
/test/version_tmp/
|
10
|
+
/tmp/
|
11
|
+
|
12
|
+
## Specific to RubyMotion:
|
13
|
+
.dat*
|
14
|
+
.repl_history
|
15
|
+
build/
|
16
|
+
|
17
|
+
## Documentation cache and generated files:
|
18
|
+
/.yardoc/
|
19
|
+
/_yardoc/
|
20
|
+
/doc/
|
21
|
+
/rdoc/
|
22
|
+
|
23
|
+
## Environment normalisation:
|
24
|
+
/.bundle/
|
25
|
+
/vendor/bundle
|
26
|
+
/lib/bundler/man/
|
27
|
+
|
28
|
+
# for a library or gem, you might want to ignore these files since the code is
|
29
|
+
# intended to run in multiple environments; otherwise, check them in:
|
30
|
+
# Gemfile.lock
|
31
|
+
# .ruby-version
|
32
|
+
# .ruby-gemset
|
33
|
+
|
34
|
+
# unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
|
35
|
+
.rvmrc
|
36
|
+
|
37
|
+
# ignore Eclipse config
|
38
|
+
.project
|
39
|
+
|
40
|
+
# ignore tests-related config file
|
41
|
+
test/powertrack.yml
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
powertrack (1.0.0)
|
5
|
+
em-http-request (~> 1.1)
|
6
|
+
eventmachine (~> 1.0)
|
7
|
+
exponential-backoff (~> 0.0.2)
|
8
|
+
multi_json (~> 1.11)
|
9
|
+
void_logger (~> 0.1)
|
10
|
+
|
11
|
+
GEM
|
12
|
+
remote: https://rubygems.org/
|
13
|
+
specs:
|
14
|
+
addressable (2.3.8)
|
15
|
+
cookiejar (0.3.2)
|
16
|
+
em-http-request (1.1.2)
|
17
|
+
addressable (>= 2.3.4)
|
18
|
+
cookiejar
|
19
|
+
em-socksify (>= 0.3)
|
20
|
+
eventmachine (>= 1.0.3)
|
21
|
+
http_parser.rb (>= 0.6.0)
|
22
|
+
em-socksify (0.3.0)
|
23
|
+
eventmachine (>= 1.0.0.beta.4)
|
24
|
+
eventmachine (1.0.7)
|
25
|
+
exponential-backoff (0.0.2)
|
26
|
+
http_parser.rb (0.6.0)
|
27
|
+
minitest (5.7.0)
|
28
|
+
multi_json (1.11.2)
|
29
|
+
rake (10.4.2)
|
30
|
+
ruby-prof (0.15.8)
|
31
|
+
void_logger (0.1)
|
32
|
+
yajl-ruby (1.2.1)
|
33
|
+
|
34
|
+
PLATFORMS
|
35
|
+
ruby
|
36
|
+
|
37
|
+
DEPENDENCIES
|
38
|
+
bundler (~> 1.7)
|
39
|
+
minitest (~> 5.5)
|
40
|
+
powertrack!
|
41
|
+
rake (~> 10.3)
|
42
|
+
ruby-prof (~> 0.15)
|
43
|
+
yajl-ruby (~> 1.0)
|
data/LICENSE.md
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
## The MIT License (MIT)
|
2
|
+
|
3
|
+
- Copyright (c) 2015 eCairn Inc.
|
4
|
+
- Copyright (c) 2012-2015 Eric Wendelin
|
5
|
+
- Copyright (c) 2011-2015 Ryan Weald / Sharethrough
|
6
|
+
|
7
|
+
```
|
8
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
9
|
+
of this software and associated documentation files (the "Software"), to deal
|
10
|
+
in the Software without restriction, including without limitation the rights
|
11
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
12
|
+
copies of the Software, and to permit persons to whom the Software is
|
13
|
+
furnished to do so, subject to the following conditions:
|
14
|
+
|
15
|
+
The above copyright notice and this permission notice shall be included in all
|
16
|
+
copies or substantial portions of the Software.
|
17
|
+
|
18
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
19
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
20
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
21
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
22
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
23
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
24
|
+
SOFTWARE.
|
25
|
+
```
|
data/README.md
ADDED
@@ -0,0 +1,117 @@
|
|
1
|
+
# powertrack-rb
|
2
|
+
A Ruby gem for building GNIP PowerTrack streaming clients.
|
3
|
+
|
4
|
+
## How to use it ?
|
5
|
+
|
6
|
+
1. Create a PowerTrack stream based on your credentials
|
7
|
+
|
8
|
+
```ruby
|
9
|
+
require 'powertrack'
|
10
|
+
|
11
|
+
stream = PowerTrack::Stream.new(
|
12
|
+
powertrack_config[:username],
|
13
|
+
powertrack_config[:password],
|
14
|
+
powertrack_config[:account_name],
|
15
|
+
powertrack_config[:data_source], # often 'twitter'
|
16
|
+
powertrack_config[:stream_label]) # often 'prod'
|
17
|
+
```
|
18
|
+
|
19
|
+
2. Add a few rules to the stream
|
20
|
+
|
21
|
+
```ruby
|
22
|
+
rule = PowerTrack::Rule.new('coke')
|
23
|
+
if rule.valid?
|
24
|
+
stream.add_rule(rule)
|
25
|
+
# double-check that the rule was actually added
|
26
|
+
raise 'Fail to add a rule' unless stream.list_rules.include?(rule)
|
27
|
+
end
|
28
|
+
```
|
29
|
+
|
30
|
+
3. Get the activities out of the stream
|
31
|
+
|
32
|
+
```ruby
|
33
|
+
received, heartbeats = 0, 0
|
34
|
+
activities = []
|
35
|
+
|
36
|
+
## defining callbacks on messages received
|
37
|
+
# callback triggered for each message received
|
38
|
+
on_message = lambda { |message| received += 1 }
|
39
|
+
# callback triggered for each heartbeat received
|
40
|
+
on_heartbeat = lambda { heartbeats += 1 }
|
41
|
+
# callback triggered for each activity received
|
42
|
+
on_activity = lambda { |activity| activities += activity }
|
43
|
+
|
44
|
+
## defining the block that will command the stop of the tracking
|
45
|
+
closed = false
|
46
|
+
close_now = lambda { closed }
|
47
|
+
|
48
|
+
delay = 60
|
49
|
+
Thread.new do
|
50
|
+
$stderr.puts "Time-bomb thread running for #{delay} seconds..."
|
51
|
+
sleep delay
|
52
|
+
$stderr.puts "Time to shut down !"
|
53
|
+
closed = true
|
54
|
+
end
|
55
|
+
|
56
|
+
started_at = Time.now
|
57
|
+
res = stream.track(on_message: on_message,
|
58
|
+
on_heartbeat: on_heartbeat,
|
59
|
+
on_activity: on_activity,
|
60
|
+
close_now: close_now)
|
61
|
+
|
62
|
+
puts "After #{delay} seconds tracking '#{rule.value}':"
|
63
|
+
puts " o #{received} messages received"
|
64
|
+
puts " o #{heartbeats} heartbeats received"
|
65
|
+
puts " o #{activities.size} activities captured"
|
66
|
+
```
|
67
|
+
|
68
|
+
Please note that each message callback must be thread-safe since it can be called
|
69
|
+
multiple times simultaneously.
|
70
|
+
|
71
|
+
## Tracking response format
|
72
|
+
|
73
|
+
By default, messages received are passed to callbacks as plain Ruby objects. Enable
|
74
|
+
the ```raw``` option to get raw JSON-formatted string and make the parsing by
|
75
|
+
yourself.
|
76
|
+
|
77
|
+
## Stop tracking
|
78
|
+
|
79
|
+
The tracker calls the ```close_now``` block each second and stops whenever the call
|
80
|
+
returns true. The stop procedure includes an additional timeframe where the tracker
|
81
|
+
waits for each pending message to be completely processed.
|
82
|
+
|
83
|
+
It's up to the developer's responsibility to complete message processing as soon as
|
84
|
+
possible. After 10 seconds (by default), the stop will be forced and a few messages
|
85
|
+
already received but not processed yet may be lost.
|
86
|
+
|
87
|
+
The ```:stop_timeout``` may be fine-tune when passing options to the tracker.
|
88
|
+
|
89
|
+
## Disconnections and Retries
|
90
|
+
|
91
|
+
As highly recommended by GNIP, the PowerTrack::Stream client manages an exponential
|
92
|
+
backoff retry mechanism when a disconnection happens. The reconnections can be
|
93
|
+
fine-tuned through the ```max_retries``` and ```backoff``` options passed to the
|
94
|
+
```track``` call.
|
95
|
+
|
96
|
+
## Backfill
|
97
|
+
|
98
|
+
Backfill is a feature provided by GNIP to avoid losing activities when being
|
99
|
+
disconnected. It automatically resends the messages sent on the stream for the
|
100
|
+
last 5 minutes when reconnecting.
|
101
|
+
|
102
|
+
Provide a (numerical) client id as the last (but optional) argument of the
|
103
|
+
PowerTrack::Stream constructor to enable this feature.
|
104
|
+
|
105
|
+
## Errors
|
106
|
+
|
107
|
+
All the errors that come from PowerTrack are defined through an ad-hoc exception
|
108
|
+
class hierarchy. See ```lib/powertrack/errors.rb```.
|
109
|
+
|
110
|
+
## Credits
|
111
|
+
|
112
|
+
The ```powertrack``` gem heavily relies on *EventMachine* and the *em-http-request*
|
113
|
+
companion gem. It also got inspiration from a few other gems
|
114
|
+
|
115
|
+
* The [gnip-rule](https://github.com/singlebrook/gnip-rule) gem
|
116
|
+
* The [gnip-stream](https://github.com/rweald/gnip-stream) gem
|
117
|
+
* The [exponential-backoff](https://github.com/pawelpacana/exponential-backoff) gem
|
data/Rakefile
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'bundler/gem_tasks'
|
2
|
+
require 'rake/testtask'
|
3
|
+
|
4
|
+
Rake::TestTask.new(:test) do |t|
|
5
|
+
t.libs << 'test'
|
6
|
+
end
|
7
|
+
|
8
|
+
require 'rdoc/task'
|
9
|
+
namespace :doc do
|
10
|
+
RDoc::Task.new do |rd|
|
11
|
+
rd.rdoc_dir = 'doc'
|
12
|
+
rd.rdoc_files.include('lib/**/*.rb')
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
task :default => :test
|
data/TODO.md
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
## General
|
2
|
+
|
3
|
+
* _[DONE]_ Rely upon MultiJson for JSON encoding and decoding
|
4
|
+
* Support thread-safe streams
|
5
|
+
|
6
|
+
A single stream used in several threads to perform several simultaneous actions
|
7
|
+
like consuming the stream while changing its rules.
|
8
|
+
|
9
|
+
It's currently impossible due to EventMachine. A transition to Celluloid::IO and
|
10
|
+
http.rb is required to be thread-friendly.
|
11
|
+
|
12
|
+
## Rules
|
13
|
+
|
14
|
+
* _[DONE]_ Check rule size
|
15
|
+
* _[DONE]_ Add 1 or more rules to a stream
|
16
|
+
* _[DONE]_ Delete some rules from the stream
|
17
|
+
* _[DONE]_ Get all existing rules for a stream
|
18
|
+
* _[DONE]_ Rules equality and usage in hash as keys
|
19
|
+
* Rule encoding (UTF-8 enforcement ?)
|
20
|
+
* Double check a rule supports all the syntactical and semantic restrictions
|
21
|
+
as defined by GNIP [PowerTrack Rules](http://support.gnip.com/apis/powertrack/rules.html#Restrictions)
|
22
|
+
reference documentation
|
23
|
+
* Support evolution of rules in terms of addition, removal and updates.
|
24
|
+
|
25
|
+
## Real-time PowerTrack
|
26
|
+
|
27
|
+
* _[DONE]_ Manage persitent connection to a data stream. See
|
28
|
+
[Powertrack API reference](http://support.gnip.com/apis/powertrack/api_reference.html)
|
29
|
+
* _[DONE]_ [Consume streaming data](http://support.gnip.com/apis/consuming_streaming_data.html)
|
30
|
+
* _[DONE]_ Capture heartbeat activities
|
31
|
+
* _[DONE]_ Capture system-related activities
|
32
|
+
|
33
|
+
## Compliance activities
|
34
|
+
|
35
|
+
See [Honoring user intent on Twitter](http://support.gnip.com/articles/honoring-user-intent-on-twitter.html)
|
36
|
+
and [Compliance Activities](http://support.gnip.com/sources/twitter/data_format.html#ComplianceActivities).
|
37
|
+
|
38
|
+
* _[DROPPED]_ Add a comply method to PowerTrack::API ?
|
39
|
+
The compliance activities are broadcasted on a specific compliance stream.
|
40
|
+
* Support the Compliance Firehose stream
|
41
|
+
[Compliance Firehose Reference](http://support.gnip.com/apis/compliance_firehose/api_reference.html)
|
42
|
+
|
43
|
+
### Account
|
44
|
+
|
45
|
+
* Protect / Unprotect account
|
46
|
+
* Delete account
|
47
|
+
* Scrub geo
|
48
|
+
* Suspend account
|
49
|
+
* Withhold account
|
50
|
+
|
51
|
+
### Status
|
52
|
+
|
53
|
+
* Delete status
|
54
|
+
* Withhold status
|
55
|
+
|
56
|
+
## Data formats
|
57
|
+
|
58
|
+
See [Data format](http://support.gnip.com/sources/twitter/data_format.html)
|
59
|
+
|
60
|
+
* _[DONE]_ Support Original output format
|
61
|
+
* _[DONE]_ Support Activity Stream output format
|
62
|
+
* _[DONE]_ Support raw format
|
63
|
+
*
|
64
|
+
* _[OUT]_ Manage retweets.
|
65
|
+
See [Identifying and Understanding retweets](http://support.gnip.com/articles/identifying-and-understanding-retweets.html)
|
66
|
+
|
67
|
+
## Disconnections
|
68
|
+
|
69
|
+
See [Managing disconnections](http://support.gnip.com/articles/disconnections-explained.html)
|
70
|
+
|
71
|
+
* _[DONE]_ Reconnect after disconnect. See
|
72
|
+
[Disconnections & Reconnecting](http://support.gnip.com/apis/consuming_streaming_data.html#Disconnections)
|
73
|
+
* _[DONE]_ Reconnect using an exponential backoff pattern.
|
74
|
+
* _[DONE]_ Support Backfill
|
75
|
+
* Support Replay
|
76
|
+
* Reconnect when there's a GNIP server issue signaled by the 503 HTTP response status
|
77
|
+
|
78
|
+
## Other features
|
79
|
+
|
80
|
+
* _[DONE]_ Support test and development streams
|
81
|
+
* Support status dashboard
|
82
|
+
* Support Historical Powertrack
|
data/lib/powertrack.rb
ADDED
@@ -0,0 +1,110 @@
|
|
1
|
+
module PowerTrack
|
2
|
+
# Base PowerTrack error, capable of wrapping another
|
3
|
+
class BasePowerTrackError < StandardError
|
4
|
+
attr_reader :status, :body
|
5
|
+
|
6
|
+
def initialize(status, msg, body=nil)
|
7
|
+
msg ||= body
|
8
|
+
_status = "#{status}".strip
|
9
|
+
_msg = "#{msg}".strip
|
10
|
+
err = [ _status, _msg ].select { |part| !part.empty? }.join(': ')
|
11
|
+
super(err)
|
12
|
+
@status = status
|
13
|
+
@body = body
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
# Base class for PowerTrack errors without a precise status
|
18
|
+
class NoStatusPowerTrackError < BasePowerTrackError
|
19
|
+
def initialize(message, body)
|
20
|
+
super(nil, message, body)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
# An error which is raised when there is a connection issue with the PowerTrack
|
25
|
+
# endpoint
|
26
|
+
class ConnectionError < NoStatusPowerTrackError
|
27
|
+
def initialize(message)
|
28
|
+
super(message, nil)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# Base class for PowerTrack errors with a precise status
|
33
|
+
class WithStatusPowerTrackError < BasePowerTrackError
|
34
|
+
# Factory method which returns an error instance based on a given status.
|
35
|
+
def self.build(status, message, body)
|
36
|
+
@@status_to_error_class ||= Hash[*self.descendants.map { |desc|
|
37
|
+
[ desc.new(nil, nil).status, desc ] }.flatten ]
|
38
|
+
if @@status_to_error_class.key?(status)
|
39
|
+
@@status_to_error_class[status].new(message, body)
|
40
|
+
else
|
41
|
+
# default to unknown status error
|
42
|
+
UnknownStatusError.new(status, message, body)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
# An exception which is raised when the response received from PowerTrack is
|
48
|
+
# invalid, poorly formatted in most cases.
|
49
|
+
class InvalidResponseError < WithStatusPowerTrackError
|
50
|
+
end
|
51
|
+
|
52
|
+
# An exception which is raised when PowerTrack returns an unknown HTTP status code.
|
53
|
+
class UnknownStatusError < WithStatusPowerTrackError
|
54
|
+
end
|
55
|
+
|
56
|
+
# Base class for errors which match a well-defined HTTP status code as
|
57
|
+
# documented in the PowerTrack API reference.
|
58
|
+
class PredefinedStatusPowerTrackError < WithStatusPowerTrackError
|
59
|
+
end
|
60
|
+
|
61
|
+
# Generally relates to poorly formatted JSON, and includes an "Invalid JSON"
|
62
|
+
# message in the response.
|
63
|
+
class BadRequestError < PredefinedStatusPowerTrackError
|
64
|
+
def initialize(message, body)
|
65
|
+
super(400, message, body)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
# HTTP authentication failed due to invalid credentials.
|
70
|
+
class UnauthorizedError < PredefinedStatusPowerTrackError
|
71
|
+
def initialize(message, body)
|
72
|
+
super(401, message, body)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# Generally, this occurs where your client fails to properly include the
|
77
|
+
# headers to accept gzip encoding from the stream, but can occur in other
|
78
|
+
# circumstances as well.
|
79
|
+
#
|
80
|
+
# Will contain a JSON message similar to "This connection requires
|
81
|
+
# compression. To enable compression, send an 'Accept-Encoding: gzip' header
|
82
|
+
# in your request and be ready to uncompress the stream as it is read on
|
83
|
+
# the client end."
|
84
|
+
class NotAcceptableError < PredefinedStatusPowerTrackError
|
85
|
+
def initialize(message, body)
|
86
|
+
super(406, message, body)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
class UnprocessableEntityError < PredefinedStatusPowerTrackError
|
91
|
+
def initialize(message, body)
|
92
|
+
super(422, message, body)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
# Your app has exceeded the limit on connection requests.
|
97
|
+
class RateLimitedError < PredefinedStatusPowerTrackError
|
98
|
+
def initialize(message, body)
|
99
|
+
super(429, message, body)
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
# Gnip server issue. If no notice about this issue has been posted on
|
104
|
+
# status.gnip.com, email support@gnip.com.
|
105
|
+
class ServiceUnavailableError < PredefinedStatusPowerTrackError
|
106
|
+
def initialize(message, body)
|
107
|
+
super(503, message, body)
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|