powertrack 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ OGNhMTRiZDk2MDJlN2VjNGE3Zjk1MmVmMTVjNjJiODQxYThkNDdkNg==
5
+ data.tar.gz: !binary |-
6
+ MjVjZTk3NjZhOWMxNTFkMTE4NjQ0MmM5Y2JjOTY0ZDE1MzZkYTUwYg==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ ODk2NGZlOWY5ZTM3N2MzYWZlYzQwNWUzNjhjNmYyYjBhMzUxODMxYzhiNjMz
10
+ YzNhOTE3NWQ5ZTE2N2M0YTZmOTNkY2M4NmRkMWJjYjM0Yjc2YzQ0OWIzNGI5
11
+ MGI4ZTg0YzdmMDUzZTYwYjc3MDAwN2FkZmY3ZWQyN2ZjYzhhODc=
12
+ data.tar.gz: !binary |-
13
+ ZjIzZGY3ZWZkNTA0MTJhZGQwZjNjN2FiNTFiZTQ3MmI2NTIzNWEyZGVkNzFj
14
+ YzczZWY5NTU0MDczNjQyZTA5YzMzMzE1ZmViODkyMDk2M2RjNGU4MTZhMGE2
15
+ M2VkODJjNzUwY2M3ZTBmOWUwMDM4NTZkYjE0NjFhZGI1YTFhMDI=
data/.gitignore ADDED
@@ -0,0 +1,41 @@
1
+ *.gem
2
+ *.rbc
3
+ /.config
4
+ /coverage/
5
+ /InstalledFiles
6
+ /pkg/
7
+ /spec/reports/
8
+ /test/tmp/
9
+ /test/version_tmp/
10
+ /tmp/
11
+
12
+ ## Specific to RubyMotion:
13
+ .dat*
14
+ .repl_history
15
+ build/
16
+
17
+ ## Documentation cache and generated files:
18
+ /.yardoc/
19
+ /_yardoc/
20
+ /doc/
21
+ /rdoc/
22
+
23
+ ## Environment normalisation:
24
+ /.bundle/
25
+ /vendor/bundle
26
+ /lib/bundler/man/
27
+
28
+ # for a library or gem, you might want to ignore these files since the code is
29
+ # intended to run in multiple environments; otherwise, check them in:
30
+ # Gemfile.lock
31
+ # .ruby-version
32
+ # .ruby-gemset
33
+
34
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
35
+ .rvmrc
36
+
37
+ # ignore Eclipse config
38
+ .project
39
+
40
+ # ignore tests-related config file
41
+ test/powertrack.yml
data/Gemfile ADDED
@@ -0,0 +1,8 @@
1
+ source 'https://rubygems.org'
2
+
3
+ group :test do
4
+ gem 'yajl-ruby', '~> 1.0', require: 'yajl'
5
+ end
6
+
7
+ # Specify your gem's dependencies in powertrack-rb.gemspec
8
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,43 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ powertrack (1.0.0)
5
+ em-http-request (~> 1.1)
6
+ eventmachine (~> 1.0)
7
+ exponential-backoff (~> 0.0.2)
8
+ multi_json (~> 1.11)
9
+ void_logger (~> 0.1)
10
+
11
+ GEM
12
+ remote: https://rubygems.org/
13
+ specs:
14
+ addressable (2.3.8)
15
+ cookiejar (0.3.2)
16
+ em-http-request (1.1.2)
17
+ addressable (>= 2.3.4)
18
+ cookiejar
19
+ em-socksify (>= 0.3)
20
+ eventmachine (>= 1.0.3)
21
+ http_parser.rb (>= 0.6.0)
22
+ em-socksify (0.3.0)
23
+ eventmachine (>= 1.0.0.beta.4)
24
+ eventmachine (1.0.7)
25
+ exponential-backoff (0.0.2)
26
+ http_parser.rb (0.6.0)
27
+ minitest (5.7.0)
28
+ multi_json (1.11.2)
29
+ rake (10.4.2)
30
+ ruby-prof (0.15.8)
31
+ void_logger (0.1)
32
+ yajl-ruby (1.2.1)
33
+
34
+ PLATFORMS
35
+ ruby
36
+
37
+ DEPENDENCIES
38
+ bundler (~> 1.7)
39
+ minitest (~> 5.5)
40
+ powertrack!
41
+ rake (~> 10.3)
42
+ ruby-prof (~> 0.15)
43
+ yajl-ruby (~> 1.0)
data/LICENSE.md ADDED
@@ -0,0 +1,25 @@
1
+ ## The MIT License (MIT)
2
+
3
+ - Copyright (c) 2015 eCairn Inc.
4
+ - Copyright (c) 2012-2015 Eric Wendelin
5
+ - Copyright (c) 2011-2015 Ryan Weald / Sharethrough
6
+
7
+ ```
8
+ Permission is hereby granted, free of charge, to any person obtaining a copy
9
+ of this software and associated documentation files (the "Software"), to deal
10
+ in the Software without restriction, including without limitation the rights
11
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12
+ copies of the Software, and to permit persons to whom the Software is
13
+ furnished to do so, subject to the following conditions:
14
+
15
+ The above copyright notice and this permission notice shall be included in all
16
+ copies or substantial portions of the Software.
17
+
18
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24
+ SOFTWARE.
25
+ ```
data/README.md ADDED
@@ -0,0 +1,117 @@
1
+ # powertrack-rb
2
+ A Ruby gem for building GNIP PowerTrack streaming clients.
3
+
4
+ ## How to use it ?
5
+
6
+ 1. Create a PowerTrack stream based on your credentials
7
+
8
+ ```ruby
9
+ require 'powertrack'
10
+
11
+ stream = PowerTrack::Stream.new(
12
+ powertrack_config[:username],
13
+ powertrack_config[:password],
14
+ powertrack_config[:account_name],
15
+ powertrack_config[:data_source], # often 'twitter'
16
+ powertrack_config[:stream_label]) # often 'prod'
17
+ ```
18
+
19
+ 2. Add a few rules to the stream
20
+
21
+ ```ruby
22
+ rule = PowerTrack::Rule.new('coke')
23
+ if rule.valid?
24
+ stream.add_rule(rule)
25
+ # double-check that the rule was actually added
26
+ raise 'Fail to add a rule' unless stream.list_rules.include?(rule)
27
+ end
28
+ ```
29
+
30
+ 3. Get the activities out of the stream
31
+
32
+ ```ruby
33
+ received, heartbeats = 0, 0
34
+ activities = []
35
+
36
+ ## defining callbacks on messages received
37
+ # callback triggered for each message received
38
+ on_message = lambda { |message| received += 1 }
39
+ # callback triggered for each heartbeat received
40
+ on_heartbeat = lambda { heartbeats += 1 }
41
+ # callback triggered for each activity received
42
+ on_activity = lambda { |activity| activities += activity }
43
+
44
+ ## defining the block that will command the stop of the tracking
45
+ closed = false
46
+ close_now = lambda { closed }
47
+
48
+ delay = 60
49
+ Thread.new do
50
+ $stderr.puts "Time-bomb thread running for #{delay} seconds..."
51
+ sleep delay
52
+ $stderr.puts "Time to shut down !"
53
+ closed = true
54
+ end
55
+
56
+ started_at = Time.now
57
+ res = stream.track(on_message: on_message,
58
+ on_heartbeat: on_heartbeat,
59
+ on_activity: on_activity,
60
+ close_now: close_now)
61
+
62
+ puts "After #{delay} seconds tracking '#{rule.value}':"
63
+ puts " o #{received} messages received"
64
+ puts " o #{heartbeats} heartbeats received"
65
+ puts " o #{activities.size} activities captured"
66
+ ```
67
+
68
+ Please note that each message callback must be thread-safe since it can be called
69
+ multiple times simultaneously.
70
+
71
+ ## Tracking response format
72
+
73
+ By default, messages received are passed to callbacks as plain Ruby objects. Enable
74
+ the ```raw``` option to get raw JSON-formatted string and make the parsing by
75
+ yourself.
76
+
77
+ ## Stop tracking
78
+
79
+ The tracker calls the ```close_now``` block each second and stops whenever the call
80
+ returns true. The stop procedure includes an additional timeframe where the tracker
81
+ waits for each pending message to be completely processed.
82
+
83
+ It's up to the developer's responsibility to complete message processing as soon as
84
+ possible. After 10 seconds (by default), the stop will be forced and a few messages
85
+ already received but not processed yet may be lost.
86
+
87
+ The ```:stop_timeout``` may be fine-tune when passing options to the tracker.
88
+
89
+ ## Disconnections and Retries
90
+
91
+ As highly recommended by GNIP, the PowerTrack::Stream client manages an exponential
92
+ backoff retry mechanism when a disconnection happens. The reconnections can be
93
+ fine-tuned through the ```max_retries``` and ```backoff``` options passed to the
94
+ ```track``` call.
95
+
96
+ ## Backfill
97
+
98
+ Backfill is a feature provided by GNIP to avoid losing activities when being
99
+ disconnected. It automatically resends the messages sent on the stream for the
100
+ last 5 minutes when reconnecting.
101
+
102
+ Provide a (numerical) client id as the last (but optional) argument of the
103
+ PowerTrack::Stream constructor to enable this feature.
104
+
105
+ ## Errors
106
+
107
+ All the errors that come from PowerTrack are defined through an ad-hoc exception
108
+ class hierarchy. See ```lib/powertrack/errors.rb```.
109
+
110
+ ## Credits
111
+
112
+ The ```powertrack``` gem heavily relies on *EventMachine* and the *em-http-request*
113
+ companion gem. It also got inspiration from a few other gems
114
+
115
+ * The [gnip-rule](https://github.com/singlebrook/gnip-rule) gem
116
+ * The [gnip-stream](https://github.com/rweald/gnip-stream) gem
117
+ * The [exponential-backoff](https://github.com/pawelpacana/exponential-backoff) gem
data/Rakefile ADDED
@@ -0,0 +1,16 @@
1
+ require 'bundler/gem_tasks'
2
+ require 'rake/testtask'
3
+
4
+ Rake::TestTask.new(:test) do |t|
5
+ t.libs << 'test'
6
+ end
7
+
8
+ require 'rdoc/task'
9
+ namespace :doc do
10
+ RDoc::Task.new do |rd|
11
+ rd.rdoc_dir = 'doc'
12
+ rd.rdoc_files.include('lib/**/*.rb')
13
+ end
14
+ end
15
+
16
+ task :default => :test
data/TODO.md ADDED
@@ -0,0 +1,82 @@
1
+ ## General
2
+
3
+ * _[DONE]_ Rely upon MultiJson for JSON encoding and decoding
4
+ * Support thread-safe streams
5
+
6
+ A single stream used in several threads to perform several simultaneous actions
7
+ like consuming the stream while changing its rules.
8
+
9
+ It's currently impossible due to EventMachine. A transition to Celluloid::IO and
10
+ http.rb is required to be thread-friendly.
11
+
12
+ ## Rules
13
+
14
+ * _[DONE]_ Check rule size
15
+ * _[DONE]_ Add 1 or more rules to a stream
16
+ * _[DONE]_ Delete some rules from the stream
17
+ * _[DONE]_ Get all existing rules for a stream
18
+ * _[DONE]_ Rules equality and usage in hash as keys
19
+ * Rule encoding (UTF-8 enforcement ?)
20
+ * Double check a rule supports all the syntactical and semantic restrictions
21
+ as defined by GNIP [PowerTrack Rules](http://support.gnip.com/apis/powertrack/rules.html#Restrictions)
22
+ reference documentation
23
+ * Support evolution of rules in terms of addition, removal and updates.
24
+
25
+ ## Real-time PowerTrack
26
+
27
+ * _[DONE]_ Manage persitent connection to a data stream. See
28
+ [Powertrack API reference](http://support.gnip.com/apis/powertrack/api_reference.html)
29
+ * _[DONE]_ [Consume streaming data](http://support.gnip.com/apis/consuming_streaming_data.html)
30
+ * _[DONE]_ Capture heartbeat activities
31
+ * _[DONE]_ Capture system-related activities
32
+
33
+ ## Compliance activities
34
+
35
+ See [Honoring user intent on Twitter](http://support.gnip.com/articles/honoring-user-intent-on-twitter.html)
36
+ and [Compliance Activities](http://support.gnip.com/sources/twitter/data_format.html#ComplianceActivities).
37
+
38
+ * _[DROPPED]_ Add a comply method to PowerTrack::API ?
39
+ The compliance activities are broadcasted on a specific compliance stream.
40
+ * Support the Compliance Firehose stream
41
+ [Compliance Firehose Reference](http://support.gnip.com/apis/compliance_firehose/api_reference.html)
42
+
43
+ ### Account
44
+
45
+ * Protect / Unprotect account
46
+ * Delete account
47
+ * Scrub geo
48
+ * Suspend account
49
+ * Withhold account
50
+
51
+ ### Status
52
+
53
+ * Delete status
54
+ * Withhold status
55
+
56
+ ## Data formats
57
+
58
+ See [Data format](http://support.gnip.com/sources/twitter/data_format.html)
59
+
60
+ * _[DONE]_ Support Original output format
61
+ * _[DONE]_ Support Activity Stream output format
62
+ * _[DONE]_ Support raw format
63
+ *
64
+ * _[OUT]_ Manage retweets.
65
+ See [Identifying and Understanding retweets](http://support.gnip.com/articles/identifying-and-understanding-retweets.html)
66
+
67
+ ## Disconnections
68
+
69
+ See [Managing disconnections](http://support.gnip.com/articles/disconnections-explained.html)
70
+
71
+ * _[DONE]_ Reconnect after disconnect. See
72
+ [Disconnections & Reconnecting](http://support.gnip.com/apis/consuming_streaming_data.html#Disconnections)
73
+ * _[DONE]_ Reconnect using an exponential backoff pattern.
74
+ * _[DONE]_ Support Backfill
75
+ * Support Replay
76
+ * Reconnect when there's a GNIP server issue signaled by the 503 HTTP response status
77
+
78
+ ## Other features
79
+
80
+ * _[DONE]_ Support test and development streams
81
+ * Support status dashboard
82
+ * Support Historical Powertrack
@@ -0,0 +1,7 @@
1
+ # See http://stackoverflow.com/questions/2393697/look-up-all-descendants-of-a-class-in-ruby
2
+ class Class
3
+ # Returns the descendants of the class.
4
+ def descendants
5
+ ObjectSpace.each_object(::Class).select {|klass| klass < self }
6
+ end
7
+ end
data/lib/powertrack.rb ADDED
@@ -0,0 +1,10 @@
1
+ require 'core_ext/class.rb'
2
+
3
+ require 'powertrack/version'
4
+ require 'powertrack/errors'
5
+
6
+ require 'powertrack/rules/rule'
7
+ require 'powertrack/rules/string_extension'
8
+
9
+ require 'powertrack/streaming/api'
10
+ require 'powertrack/streaming/stream'
@@ -0,0 +1,110 @@
1
+ module PowerTrack
2
+ # Base PowerTrack error, capable of wrapping another
3
+ class BasePowerTrackError < StandardError
4
+ attr_reader :status, :body
5
+
6
+ def initialize(status, msg, body=nil)
7
+ msg ||= body
8
+ _status = "#{status}".strip
9
+ _msg = "#{msg}".strip
10
+ err = [ _status, _msg ].select { |part| !part.empty? }.join(': ')
11
+ super(err)
12
+ @status = status
13
+ @body = body
14
+ end
15
+ end
16
+
17
+ # Base class for PowerTrack errors without a precise status
18
+ class NoStatusPowerTrackError < BasePowerTrackError
19
+ def initialize(message, body)
20
+ super(nil, message, body)
21
+ end
22
+ end
23
+
24
+ # An error which is raised when there is a connection issue with the PowerTrack
25
+ # endpoint
26
+ class ConnectionError < NoStatusPowerTrackError
27
+ def initialize(message)
28
+ super(message, nil)
29
+ end
30
+ end
31
+
32
+ # Base class for PowerTrack errors with a precise status
33
+ class WithStatusPowerTrackError < BasePowerTrackError
34
+ # Factory method which returns an error instance based on a given status.
35
+ def self.build(status, message, body)
36
+ @@status_to_error_class ||= Hash[*self.descendants.map { |desc|
37
+ [ desc.new(nil, nil).status, desc ] }.flatten ]
38
+ if @@status_to_error_class.key?(status)
39
+ @@status_to_error_class[status].new(message, body)
40
+ else
41
+ # default to unknown status error
42
+ UnknownStatusError.new(status, message, body)
43
+ end
44
+ end
45
+ end
46
+
47
+ # An exception which is raised when the response received from PowerTrack is
48
+ # invalid, poorly formatted in most cases.
49
+ class InvalidResponseError < WithStatusPowerTrackError
50
+ end
51
+
52
+ # An exception which is raised when PowerTrack returns an unknown HTTP status code.
53
+ class UnknownStatusError < WithStatusPowerTrackError
54
+ end
55
+
56
+ # Base class for errors which match a well-defined HTTP status code as
57
+ # documented in the PowerTrack API reference.
58
+ class PredefinedStatusPowerTrackError < WithStatusPowerTrackError
59
+ end
60
+
61
+ # Generally relates to poorly formatted JSON, and includes an "Invalid JSON"
62
+ # message in the response.
63
+ class BadRequestError < PredefinedStatusPowerTrackError
64
+ def initialize(message, body)
65
+ super(400, message, body)
66
+ end
67
+ end
68
+
69
+ # HTTP authentication failed due to invalid credentials.
70
+ class UnauthorizedError < PredefinedStatusPowerTrackError
71
+ def initialize(message, body)
72
+ super(401, message, body)
73
+ end
74
+ end
75
+
76
+ # Generally, this occurs where your client fails to properly include the
77
+ # headers to accept gzip encoding from the stream, but can occur in other
78
+ # circumstances as well.
79
+ #
80
+ # Will contain a JSON message similar to "This connection requires
81
+ # compression. To enable compression, send an 'Accept-Encoding: gzip' header
82
+ # in your request and be ready to uncompress the stream as it is read on
83
+ # the client end."
84
+ class NotAcceptableError < PredefinedStatusPowerTrackError
85
+ def initialize(message, body)
86
+ super(406, message, body)
87
+ end
88
+ end
89
+
90
+ class UnprocessableEntityError < PredefinedStatusPowerTrackError
91
+ def initialize(message, body)
92
+ super(422, message, body)
93
+ end
94
+ end
95
+
96
+ # Your app has exceeded the limit on connection requests.
97
+ class RateLimitedError < PredefinedStatusPowerTrackError
98
+ def initialize(message, body)
99
+ super(429, message, body)
100
+ end
101
+ end
102
+
103
+ # Gnip server issue. If no notice about this issue has been posted on
104
+ # status.gnip.com, email support@gnip.com.
105
+ class ServiceUnavailableError < PredefinedStatusPowerTrackError
106
+ def initialize(message, body)
107
+ super(503, message, body)
108
+ end
109
+ end
110
+ end