powertrack 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ OGNhMTRiZDk2MDJlN2VjNGE3Zjk1MmVmMTVjNjJiODQxYThkNDdkNg==
5
+ data.tar.gz: !binary |-
6
+ MjVjZTk3NjZhOWMxNTFkMTE4NjQ0MmM5Y2JjOTY0ZDE1MzZkYTUwYg==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ ODk2NGZlOWY5ZTM3N2MzYWZlYzQwNWUzNjhjNmYyYjBhMzUxODMxYzhiNjMz
10
+ YzNhOTE3NWQ5ZTE2N2M0YTZmOTNkY2M4NmRkMWJjYjM0Yjc2YzQ0OWIzNGI5
11
+ MGI4ZTg0YzdmMDUzZTYwYjc3MDAwN2FkZmY3ZWQyN2ZjYzhhODc=
12
+ data.tar.gz: !binary |-
13
+ ZjIzZGY3ZWZkNTA0MTJhZGQwZjNjN2FiNTFiZTQ3MmI2NTIzNWEyZGVkNzFj
14
+ YzczZWY5NTU0MDczNjQyZTA5YzMzMzE1ZmViODkyMDk2M2RjNGU4MTZhMGE2
15
+ M2VkODJjNzUwY2M3ZTBmOWUwMDM4NTZkYjE0NjFhZGI1YTFhMDI=
data/.gitignore ADDED
@@ -0,0 +1,41 @@
1
+ *.gem
2
+ *.rbc
3
+ /.config
4
+ /coverage/
5
+ /InstalledFiles
6
+ /pkg/
7
+ /spec/reports/
8
+ /test/tmp/
9
+ /test/version_tmp/
10
+ /tmp/
11
+
12
+ ## Specific to RubyMotion:
13
+ .dat*
14
+ .repl_history
15
+ build/
16
+
17
+ ## Documentation cache and generated files:
18
+ /.yardoc/
19
+ /_yardoc/
20
+ /doc/
21
+ /rdoc/
22
+
23
+ ## Environment normalisation:
24
+ /.bundle/
25
+ /vendor/bundle
26
+ /lib/bundler/man/
27
+
28
+ # for a library or gem, you might want to ignore these files since the code is
29
+ # intended to run in multiple environments; otherwise, check them in:
30
+ # Gemfile.lock
31
+ # .ruby-version
32
+ # .ruby-gemset
33
+
34
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
35
+ .rvmrc
36
+
37
+ # ignore Eclipse config
38
+ .project
39
+
40
+ # ignore tests-related config file
41
+ test/powertrack.yml
data/Gemfile ADDED
@@ -0,0 +1,8 @@
1
+ source 'https://rubygems.org'
2
+
3
+ group :test do
4
+ gem 'yajl-ruby', '~> 1.0', require: 'yajl'
5
+ end
6
+
7
+ # Specify your gem's dependencies in powertrack-rb.gemspec
8
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,43 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ powertrack (1.0.0)
5
+ em-http-request (~> 1.1)
6
+ eventmachine (~> 1.0)
7
+ exponential-backoff (~> 0.0.2)
8
+ multi_json (~> 1.11)
9
+ void_logger (~> 0.1)
10
+
11
+ GEM
12
+ remote: https://rubygems.org/
13
+ specs:
14
+ addressable (2.3.8)
15
+ cookiejar (0.3.2)
16
+ em-http-request (1.1.2)
17
+ addressable (>= 2.3.4)
18
+ cookiejar
19
+ em-socksify (>= 0.3)
20
+ eventmachine (>= 1.0.3)
21
+ http_parser.rb (>= 0.6.0)
22
+ em-socksify (0.3.0)
23
+ eventmachine (>= 1.0.0.beta.4)
24
+ eventmachine (1.0.7)
25
+ exponential-backoff (0.0.2)
26
+ http_parser.rb (0.6.0)
27
+ minitest (5.7.0)
28
+ multi_json (1.11.2)
29
+ rake (10.4.2)
30
+ ruby-prof (0.15.8)
31
+ void_logger (0.1)
32
+ yajl-ruby (1.2.1)
33
+
34
+ PLATFORMS
35
+ ruby
36
+
37
+ DEPENDENCIES
38
+ bundler (~> 1.7)
39
+ minitest (~> 5.5)
40
+ powertrack!
41
+ rake (~> 10.3)
42
+ ruby-prof (~> 0.15)
43
+ yajl-ruby (~> 1.0)
data/LICENSE.md ADDED
@@ -0,0 +1,25 @@
1
+ ## The MIT License (MIT)
2
+
3
+ - Copyright (c) 2015 eCairn Inc.
4
+ - Copyright (c) 2012-2015 Eric Wendelin
5
+ - Copyright (c) 2011-2015 Ryan Weald / Sharethrough
6
+
7
+ ```
8
+ Permission is hereby granted, free of charge, to any person obtaining a copy
9
+ of this software and associated documentation files (the "Software"), to deal
10
+ in the Software without restriction, including without limitation the rights
11
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12
+ copies of the Software, and to permit persons to whom the Software is
13
+ furnished to do so, subject to the following conditions:
14
+
15
+ The above copyright notice and this permission notice shall be included in all
16
+ copies or substantial portions of the Software.
17
+
18
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24
+ SOFTWARE.
25
+ ```
data/README.md ADDED
@@ -0,0 +1,117 @@
1
+ # powertrack-rb
2
+ A Ruby gem for building GNIP PowerTrack streaming clients.
3
+
4
+ ## How to use it ?
5
+
6
+ 1. Create a PowerTrack stream based on your credentials
7
+
8
+ ```ruby
9
+ require 'powertrack'
10
+
11
+ stream = PowerTrack::Stream.new(
12
+ powertrack_config[:username],
13
+ powertrack_config[:password],
14
+ powertrack_config[:account_name],
15
+ powertrack_config[:data_source], # often 'twitter'
16
+ powertrack_config[:stream_label]) # often 'prod'
17
+ ```
18
+
19
+ 2. Add a few rules to the stream
20
+
21
+ ```ruby
22
+ rule = PowerTrack::Rule.new('coke')
23
+ if rule.valid?
24
+ stream.add_rule(rule)
25
+ # double-check that the rule was actually added
26
+ raise 'Fail to add a rule' unless stream.list_rules.include?(rule)
27
+ end
28
+ ```
29
+
30
+ 3. Get the activities out of the stream
31
+
32
+ ```ruby
33
+ received, heartbeats = 0, 0
34
+ activities = []
35
+
36
+ ## defining callbacks on messages received
37
+ # callback triggered for each message received
38
+ on_message = lambda { |message| received += 1 }
39
+ # callback triggered for each heartbeat received
40
+ on_heartbeat = lambda { heartbeats += 1 }
41
+ # callback triggered for each activity received
42
+ on_activity = lambda { |activity| activities += activity }
43
+
44
+ ## defining the block that will command the stop of the tracking
45
+ closed = false
46
+ close_now = lambda { closed }
47
+
48
+ delay = 60
49
+ Thread.new do
50
+ $stderr.puts "Time-bomb thread running for #{delay} seconds..."
51
+ sleep delay
52
+ $stderr.puts "Time to shut down !"
53
+ closed = true
54
+ end
55
+
56
+ started_at = Time.now
57
+ res = stream.track(on_message: on_message,
58
+ on_heartbeat: on_heartbeat,
59
+ on_activity: on_activity,
60
+ close_now: close_now)
61
+
62
+ puts "After #{delay} seconds tracking '#{rule.value}':"
63
+ puts " o #{received} messages received"
64
+ puts " o #{heartbeats} heartbeats received"
65
+ puts " o #{activities.size} activities captured"
66
+ ```
67
+
68
+ Please note that each message callback must be thread-safe since it can be called
69
+ multiple times simultaneously.
70
+
71
+ ## Tracking response format
72
+
73
+ By default, messages received are passed to callbacks as plain Ruby objects. Enable
74
+ the ```raw``` option to get raw JSON-formatted string and make the parsing by
75
+ yourself.
76
+
77
+ ## Stop tracking
78
+
79
+ The tracker calls the ```close_now``` block each second and stops whenever the call
80
+ returns true. The stop procedure includes an additional timeframe where the tracker
81
+ waits for each pending message to be completely processed.
82
+
83
+ It's up to the developer's responsibility to complete message processing as soon as
84
+ possible. After 10 seconds (by default), the stop will be forced and a few messages
85
+ already received but not processed yet may be lost.
86
+
87
+ The ```:stop_timeout``` may be fine-tune when passing options to the tracker.
88
+
89
+ ## Disconnections and Retries
90
+
91
+ As highly recommended by GNIP, the PowerTrack::Stream client manages an exponential
92
+ backoff retry mechanism when a disconnection happens. The reconnections can be
93
+ fine-tuned through the ```max_retries``` and ```backoff``` options passed to the
94
+ ```track``` call.
95
+
96
+ ## Backfill
97
+
98
+ Backfill is a feature provided by GNIP to avoid losing activities when being
99
+ disconnected. It automatically resends the messages sent on the stream for the
100
+ last 5 minutes when reconnecting.
101
+
102
+ Provide a (numerical) client id as the last (but optional) argument of the
103
+ PowerTrack::Stream constructor to enable this feature.
104
+
105
+ ## Errors
106
+
107
+ All the errors that come from PowerTrack are defined through an ad-hoc exception
108
+ class hierarchy. See ```lib/powertrack/errors.rb```.
109
+
110
+ ## Credits
111
+
112
+ The ```powertrack``` gem heavily relies on *EventMachine* and the *em-http-request*
113
+ companion gem. It also got inspiration from a few other gems
114
+
115
+ * The [gnip-rule](https://github.com/singlebrook/gnip-rule) gem
116
+ * The [gnip-stream](https://github.com/rweald/gnip-stream) gem
117
+ * The [exponential-backoff](https://github.com/pawelpacana/exponential-backoff) gem
data/Rakefile ADDED
@@ -0,0 +1,16 @@
1
+ require 'bundler/gem_tasks'
2
+ require 'rake/testtask'
3
+
4
+ Rake::TestTask.new(:test) do |t|
5
+ t.libs << 'test'
6
+ end
7
+
8
+ require 'rdoc/task'
9
+ namespace :doc do
10
+ RDoc::Task.new do |rd|
11
+ rd.rdoc_dir = 'doc'
12
+ rd.rdoc_files.include('lib/**/*.rb')
13
+ end
14
+ end
15
+
16
+ task :default => :test
data/TODO.md ADDED
@@ -0,0 +1,82 @@
1
+ ## General
2
+
3
+ * _[DONE]_ Rely upon MultiJson for JSON encoding and decoding
4
+ * Support thread-safe streams
5
+
6
+ A single stream used in several threads to perform several simultaneous actions
7
+ like consuming the stream while changing its rules.
8
+
9
+ It's currently impossible due to EventMachine. A transition to Celluloid::IO and
10
+ http.rb is required to be thread-friendly.
11
+
12
+ ## Rules
13
+
14
+ * _[DONE]_ Check rule size
15
+ * _[DONE]_ Add 1 or more rules to a stream
16
+ * _[DONE]_ Delete some rules from the stream
17
+ * _[DONE]_ Get all existing rules for a stream
18
+ * _[DONE]_ Rules equality and usage in hash as keys
19
+ * Rule encoding (UTF-8 enforcement ?)
20
+ * Double check a rule supports all the syntactical and semantic restrictions
21
+ as defined by GNIP [PowerTrack Rules](http://support.gnip.com/apis/powertrack/rules.html#Restrictions)
22
+ reference documentation
23
+ * Support evolution of rules in terms of addition, removal and updates.
24
+
25
+ ## Real-time PowerTrack
26
+
27
+ * _[DONE]_ Manage persitent connection to a data stream. See
28
+ [Powertrack API reference](http://support.gnip.com/apis/powertrack/api_reference.html)
29
+ * _[DONE]_ [Consume streaming data](http://support.gnip.com/apis/consuming_streaming_data.html)
30
+ * _[DONE]_ Capture heartbeat activities
31
+ * _[DONE]_ Capture system-related activities
32
+
33
+ ## Compliance activities
34
+
35
+ See [Honoring user intent on Twitter](http://support.gnip.com/articles/honoring-user-intent-on-twitter.html)
36
+ and [Compliance Activities](http://support.gnip.com/sources/twitter/data_format.html#ComplianceActivities).
37
+
38
+ * _[DROPPED]_ Add a comply method to PowerTrack::API ?
39
+ The compliance activities are broadcasted on a specific compliance stream.
40
+ * Support the Compliance Firehose stream
41
+ [Compliance Firehose Reference](http://support.gnip.com/apis/compliance_firehose/api_reference.html)
42
+
43
+ ### Account
44
+
45
+ * Protect / Unprotect account
46
+ * Delete account
47
+ * Scrub geo
48
+ * Suspend account
49
+ * Withhold account
50
+
51
+ ### Status
52
+
53
+ * Delete status
54
+ * Withhold status
55
+
56
+ ## Data formats
57
+
58
+ See [Data format](http://support.gnip.com/sources/twitter/data_format.html)
59
+
60
+ * _[DONE]_ Support Original output format
61
+ * _[DONE]_ Support Activity Stream output format
62
+ * _[DONE]_ Support raw format
63
+ *
64
+ * _[OUT]_ Manage retweets.
65
+ See [Identifying and Understanding retweets](http://support.gnip.com/articles/identifying-and-understanding-retweets.html)
66
+
67
+ ## Disconnections
68
+
69
+ See [Managing disconnections](http://support.gnip.com/articles/disconnections-explained.html)
70
+
71
+ * _[DONE]_ Reconnect after disconnect. See
72
+ [Disconnections & Reconnecting](http://support.gnip.com/apis/consuming_streaming_data.html#Disconnections)
73
+ * _[DONE]_ Reconnect using an exponential backoff pattern.
74
+ * _[DONE]_ Support Backfill
75
+ * Support Replay
76
+ * Reconnect when there's a GNIP server issue signaled by the 503 HTTP response status
77
+
78
+ ## Other features
79
+
80
+ * _[DONE]_ Support test and development streams
81
+ * Support status dashboard
82
+ * Support Historical Powertrack
@@ -0,0 +1,7 @@
1
+ # See http://stackoverflow.com/questions/2393697/look-up-all-descendants-of-a-class-in-ruby
2
+ class Class
3
+ # Returns the descendants of the class.
4
+ def descendants
5
+ ObjectSpace.each_object(::Class).select {|klass| klass < self }
6
+ end
7
+ end
data/lib/powertrack.rb ADDED
@@ -0,0 +1,10 @@
1
+ require 'core_ext/class.rb'
2
+
3
+ require 'powertrack/version'
4
+ require 'powertrack/errors'
5
+
6
+ require 'powertrack/rules/rule'
7
+ require 'powertrack/rules/string_extension'
8
+
9
+ require 'powertrack/streaming/api'
10
+ require 'powertrack/streaming/stream'
@@ -0,0 +1,110 @@
1
+ module PowerTrack
2
+ # Base PowerTrack error, capable of wrapping another
3
+ class BasePowerTrackError < StandardError
4
+ attr_reader :status, :body
5
+
6
+ def initialize(status, msg, body=nil)
7
+ msg ||= body
8
+ _status = "#{status}".strip
9
+ _msg = "#{msg}".strip
10
+ err = [ _status, _msg ].select { |part| !part.empty? }.join(': ')
11
+ super(err)
12
+ @status = status
13
+ @body = body
14
+ end
15
+ end
16
+
17
+ # Base class for PowerTrack errors without a precise status
18
+ class NoStatusPowerTrackError < BasePowerTrackError
19
+ def initialize(message, body)
20
+ super(nil, message, body)
21
+ end
22
+ end
23
+
24
+ # An error which is raised when there is a connection issue with the PowerTrack
25
+ # endpoint
26
+ class ConnectionError < NoStatusPowerTrackError
27
+ def initialize(message)
28
+ super(message, nil)
29
+ end
30
+ end
31
+
32
+ # Base class for PowerTrack errors with a precise status
33
+ class WithStatusPowerTrackError < BasePowerTrackError
34
+ # Factory method which returns an error instance based on a given status.
35
+ def self.build(status, message, body)
36
+ @@status_to_error_class ||= Hash[*self.descendants.map { |desc|
37
+ [ desc.new(nil, nil).status, desc ] }.flatten ]
38
+ if @@status_to_error_class.key?(status)
39
+ @@status_to_error_class[status].new(message, body)
40
+ else
41
+ # default to unknown status error
42
+ UnknownStatusError.new(status, message, body)
43
+ end
44
+ end
45
+ end
46
+
47
+ # An exception which is raised when the response received from PowerTrack is
48
+ # invalid, poorly formatted in most cases.
49
+ class InvalidResponseError < WithStatusPowerTrackError
50
+ end
51
+
52
+ # An exception which is raised when PowerTrack returns an unknown HTTP status code.
53
+ class UnknownStatusError < WithStatusPowerTrackError
54
+ end
55
+
56
+ # Base class for errors which match a well-defined HTTP status code as
57
+ # documented in the PowerTrack API reference.
58
+ class PredefinedStatusPowerTrackError < WithStatusPowerTrackError
59
+ end
60
+
61
+ # Generally relates to poorly formatted JSON, and includes an "Invalid JSON"
62
+ # message in the response.
63
+ class BadRequestError < PredefinedStatusPowerTrackError
64
+ def initialize(message, body)
65
+ super(400, message, body)
66
+ end
67
+ end
68
+
69
+ # HTTP authentication failed due to invalid credentials.
70
+ class UnauthorizedError < PredefinedStatusPowerTrackError
71
+ def initialize(message, body)
72
+ super(401, message, body)
73
+ end
74
+ end
75
+
76
+ # Generally, this occurs where your client fails to properly include the
77
+ # headers to accept gzip encoding from the stream, but can occur in other
78
+ # circumstances as well.
79
+ #
80
+ # Will contain a JSON message similar to "This connection requires
81
+ # compression. To enable compression, send an 'Accept-Encoding: gzip' header
82
+ # in your request and be ready to uncompress the stream as it is read on
83
+ # the client end."
84
+ class NotAcceptableError < PredefinedStatusPowerTrackError
85
+ def initialize(message, body)
86
+ super(406, message, body)
87
+ end
88
+ end
89
+
90
+ class UnprocessableEntityError < PredefinedStatusPowerTrackError
91
+ def initialize(message, body)
92
+ super(422, message, body)
93
+ end
94
+ end
95
+
96
+ # Your app has exceeded the limit on connection requests.
97
+ class RateLimitedError < PredefinedStatusPowerTrackError
98
+ def initialize(message, body)
99
+ super(429, message, body)
100
+ end
101
+ end
102
+
103
+ # Gnip server issue. If no notice about this issue has been posted on
104
+ # status.gnip.com, email support@gnip.com.
105
+ class ServiceUnavailableError < PredefinedStatusPowerTrackError
106
+ def initialize(message, body)
107
+ super(503, message, body)
108
+ end
109
+ end
110
+ end