powertrack 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/.gitignore +41 -0
- data/Gemfile +8 -0
- data/Gemfile.lock +43 -0
- data/LICENSE.md +25 -0
- data/README.md +117 -0
- data/Rakefile +16 -0
- data/TODO.md +82 -0
- data/lib/core_ext/class.rb +7 -0
- data/lib/powertrack.rb +10 -0
- data/lib/powertrack/errors.rb +110 -0
- data/lib/powertrack/rules/rule.rb +140 -0
- data/lib/powertrack/rules/string_extension.rb +9 -0
- data/lib/powertrack/streaming/api.rb +64 -0
- data/lib/powertrack/streaming/data_buffer.rb +36 -0
- data/lib/powertrack/streaming/retrier.rb +70 -0
- data/lib/powertrack/streaming/stream.rb +429 -0
- data/lib/powertrack/version.rb +3 -0
- data/powertrack.gemspec +32 -0
- data/test/minitest_helper.rb +41 -0
- data/test/test_manage_rules.rb +30 -0
- data/test/test_rule.rb +163 -0
- data/test/test_track_stream.rb +72 -0
- metadata +202 -0
@@ -0,0 +1,140 @@
|
|
1
|
+
require 'multi_json'
|
2
|
+
|
3
|
+
module PowerTrack
|
4
|
+
# A PowerTrack rule with its components and restrictions.
|
5
|
+
class Rule
|
6
|
+
|
7
|
+
# The maximum length of a rule tag.
|
8
|
+
MAX_TAG_LENGTH = 255
|
9
|
+
|
10
|
+
# The maximum lengh of the value of a standard rule
|
11
|
+
MAX_STD_RULE_VALUE_LENGTH = 1024
|
12
|
+
|
13
|
+
# The maximum lengh of the value of a long rule
|
14
|
+
MAX_LONG_RULE_VALUE_LENGTH = 2048
|
15
|
+
|
16
|
+
# The maximum number of positive terms in a single rule value
|
17
|
+
MAX_POSITIVE_TERMS = 30
|
18
|
+
|
19
|
+
# The maximum number of negative terms in a single rule value
|
20
|
+
MAX_NEGATIVE_TERMS = 50
|
21
|
+
|
22
|
+
attr_reader :value, :tag, :error
|
23
|
+
|
24
|
+
# Builds a new rule based on a value and an optional tag.
|
25
|
+
# By default, the constructor assesses if it's a long rule or not
|
26
|
+
# based on the length of the value. But the 'long' feature can be
|
27
|
+
# explicitly specified with the third parameter.
|
28
|
+
def initialize(value, tag=nil, long=nil)
|
29
|
+
@value = value || ''
|
30
|
+
@tag = tag
|
31
|
+
# check if long is a boolean
|
32
|
+
@long = long == !!long ? long : @value.size > MAX_STD_RULE_VALUE_LENGTH
|
33
|
+
@error = nil
|
34
|
+
end
|
35
|
+
|
36
|
+
# Returns true if the rule is long.
|
37
|
+
def long?
|
38
|
+
@long
|
39
|
+
end
|
40
|
+
|
41
|
+
# Returns true if the rule is valid, false otherwise. The validation error
|
42
|
+
# can be through the error method.
|
43
|
+
def valid?
|
44
|
+
# reset error
|
45
|
+
@error = nil
|
46
|
+
|
47
|
+
[ :too_long_value?,
|
48
|
+
:too_many_positive_terms?,
|
49
|
+
:too_many_negative_terms?,
|
50
|
+
:contains_empty_source?,
|
51
|
+
:contains_negated_or?,
|
52
|
+
:too_long_tag? ].each do |validator|
|
53
|
+
|
54
|
+
# stop when 1 validator fails
|
55
|
+
if self.send(validator)
|
56
|
+
@error = validator.to_s.gsub(/_/, ' ').gsub(/\?/, '').capitalize
|
57
|
+
return false
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
true
|
62
|
+
end
|
63
|
+
|
64
|
+
# Dumps the rule in a valid JSON format.
|
65
|
+
def to_json
|
66
|
+
MultiJson.encode(to_hash)
|
67
|
+
end
|
68
|
+
|
69
|
+
# Converts the rule in a Hash.
|
70
|
+
def to_hash
|
71
|
+
res = {:value => @value}
|
72
|
+
res[:tag] = @tag unless @tag.nil?
|
73
|
+
res
|
74
|
+
end
|
75
|
+
|
76
|
+
# Converts the rule in a string, the JSON representation of the rule actually.
|
77
|
+
def to_s
|
78
|
+
to_json
|
79
|
+
end
|
80
|
+
|
81
|
+
# Returns true when the rule is equal to the other rule provided.
|
82
|
+
def ==(other)
|
83
|
+
other.class == self.class &&
|
84
|
+
other.value == @value &&
|
85
|
+
other.tag == @tag &&
|
86
|
+
other.long? == self.long?
|
87
|
+
end
|
88
|
+
|
89
|
+
alias eql? ==
|
90
|
+
|
91
|
+
# Returns a hash for the rule based on its components. Useful for using
|
92
|
+
# rules as Hash keys.
|
93
|
+
def hash
|
94
|
+
# let's assume a nil value for @value or @tag is not different from the empty value
|
95
|
+
"v:#{@value},t:#{@tag},l:#{@long}".hash
|
96
|
+
end
|
97
|
+
|
98
|
+
# Returns the maximum length of the rule value according to the type of the
|
99
|
+
# rule (long or standard).
|
100
|
+
def max_value_length
|
101
|
+
long? ? MAX_LONG_RULE_VALUE_LENGTH : MAX_STD_RULE_VALUE_LENGTH
|
102
|
+
end
|
103
|
+
|
104
|
+
protected
|
105
|
+
|
106
|
+
# Is the rule value too long ?
|
107
|
+
def too_long_value?
|
108
|
+
@value.size > max_value_length
|
109
|
+
end
|
110
|
+
|
111
|
+
# Does the rule value contain a forbidden negated OR ?
|
112
|
+
def contains_negated_or?
|
113
|
+
!@value[/\-\w+ OR/].nil? || !@value[/OR \-\w+/].nil?
|
114
|
+
end
|
115
|
+
|
116
|
+
# Does the rule value contain too many positive terms ?
|
117
|
+
def too_many_positive_terms?
|
118
|
+
return false if long?
|
119
|
+
# negative look-behind; see http://www.rexegg.com/regex-disambiguation.html
|
120
|
+
# exclude the OR operator from the terms being counted
|
121
|
+
@value.scan(/(?<!-)(\b[\w:]+|\"[\-\s\w:]+\"\b)/).select { |match| match.first != 'OR' }.size > MAX_POSITIVE_TERMS
|
122
|
+
end
|
123
|
+
|
124
|
+
# Does the rule value contain too many negative terms ?
|
125
|
+
def too_many_negative_terms?
|
126
|
+
return false if long?
|
127
|
+
@value.scan(/(^| )\-(\w|\([^(]*\)|\"[^"]*\")/).size > MAX_NEGATIVE_TERMS
|
128
|
+
end
|
129
|
+
|
130
|
+
# Does the rule value contain an empty source ?
|
131
|
+
def contains_empty_source?
|
132
|
+
!@value[/source\:\s/].nil?
|
133
|
+
end
|
134
|
+
|
135
|
+
# Is the rule tag too long ?
|
136
|
+
def too_long_tag?
|
137
|
+
@tag && @tag.size > MAX_TAG_LENGTH
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
@@ -0,0 +1,9 @@
|
|
1
|
+
require 'powertrack/rules/rule'
|
2
|
+
|
3
|
+
# Extend core String class with a rule transformer
|
4
|
+
class String
|
5
|
+
# Returns a PowerTrace::Rule instance based on the value of the string.
|
6
|
+
def to_pwtk_rule(tag=nil, long=nil)
|
7
|
+
PowerTrack::Rule.new(self, tag, long)
|
8
|
+
end
|
9
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
module PowerTrack
|
2
|
+
module API
|
3
|
+
# Adds many rules to your PowerTrack stream’s ruleset.
|
4
|
+
#
|
5
|
+
# <tt>POST /rules</tt>
|
6
|
+
#
|
7
|
+
# See http://support.gnip.com/apis/powertrack/api_reference.html#AddRules
|
8
|
+
def add_rules(*rules)
|
9
|
+
raise NotImplementedError
|
10
|
+
end
|
11
|
+
|
12
|
+
# Adds one rule to your PowerTrack stream’s ruleset.
|
13
|
+
#
|
14
|
+
# <tt>POST /rules</tt>
|
15
|
+
#
|
16
|
+
# See http://support.gnip.com/apis/powertrack/api_reference.html#AddRules
|
17
|
+
def add_rule(rule)
|
18
|
+
add_rules(rule)
|
19
|
+
end
|
20
|
+
|
21
|
+
# Removes the specified rules from the stream.
|
22
|
+
#
|
23
|
+
# <tt>DELETE /rules</tt>
|
24
|
+
#
|
25
|
+
# See http://support.gnip.com/apis/powertrack/api_reference.html#DeleteRules
|
26
|
+
def delete_rules(*rules)
|
27
|
+
raise NotImplementedError
|
28
|
+
end
|
29
|
+
|
30
|
+
# Removes the specified rule from the stream.
|
31
|
+
#
|
32
|
+
# <tt>DELETE /rules</tt>
|
33
|
+
#
|
34
|
+
# See http://support.gnip.com/apis/powertrack/api_reference.html#DeleteRules
|
35
|
+
def delete_rule(rule)
|
36
|
+
delete_rules(rule)
|
37
|
+
end
|
38
|
+
|
39
|
+
# Retrieves all existing rules for a stream.
|
40
|
+
#
|
41
|
+
# <tt>GET /rules</tt>
|
42
|
+
#
|
43
|
+
# See http://support.gnip.com/apis/powertrack/api_reference.html#ListRules
|
44
|
+
#
|
45
|
+
# Options:
|
46
|
+
# o compressed: [true|false] To demand gzip-compressed response from GNIP
|
47
|
+
# true by default
|
48
|
+
# o objectify: [true|false] To demand PowerTrack::Rule object as results
|
49
|
+
# instead of raw JSON. True by default.
|
50
|
+
def list_rules(options=nil)
|
51
|
+
raise NotImplementedError
|
52
|
+
end
|
53
|
+
|
54
|
+
# Establishes a persistent connection to the PowerTrack data stream,
|
55
|
+
# through which the social data will be delivered.
|
56
|
+
#
|
57
|
+
# <tt>GET /track/:stream</tt>
|
58
|
+
#
|
59
|
+
# See http://support.gnip.com/apis/powertrack/api_reference.html#Stream
|
60
|
+
def track(options=nil)
|
61
|
+
raise NotImplementedError
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module PowerTrack
|
2
|
+
# A buffer of data received from PowerTrack. Useful for managing the sequential
|
3
|
+
# chunk of bytes sent of the stream by GNIP and slice them into well-formatted
|
4
|
+
# messages.
|
5
|
+
class DataBuffer
|
6
|
+
|
7
|
+
# The pattern used by GNIP PowerTrack to delimitate a single message.
|
8
|
+
MESSAGE_PATTERN = /^([^\r]*)\r\n/m
|
9
|
+
|
10
|
+
# Builds a new data buffer.
|
11
|
+
def initialize
|
12
|
+
@buffer = ''
|
13
|
+
end
|
14
|
+
|
15
|
+
# Add a chunk of bytes to the buffer and pass the new message(s) extracted
|
16
|
+
# to the block provided.
|
17
|
+
def process(chunk, &block)
|
18
|
+
@buffer.concat(chunk)
|
19
|
+
@buffer.gsub!(MESSAGE_PATTERN) do |match|
|
20
|
+
yield($1.to_s) if block_given?
|
21
|
+
# erase the message
|
22
|
+
''
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
# The current size of the buffer.
|
27
|
+
def size
|
28
|
+
@buffer.size
|
29
|
+
end
|
30
|
+
|
31
|
+
# Resets the buffer, therefore losing any bytes received from PowerTrack.
|
32
|
+
def reset!
|
33
|
+
@buffer = ''
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
require 'exponential_backoff'
|
2
|
+
|
3
|
+
module PowerTrack
|
4
|
+
# A utility class that manges an exponential backoff retry pattern.
|
5
|
+
# Additionally, this king of retrier can be reset or stopped by the code being
|
6
|
+
# retried.
|
7
|
+
class Retrier
|
8
|
+
attr_reader :retries, :max_retries
|
9
|
+
|
10
|
+
# the default minimum number of seconds b/w 2 attempts
|
11
|
+
DEFAULT_MIN_INTERVAL = 1.0
|
12
|
+
# the default maximum number of seconds to wait b/w 2 attempts
|
13
|
+
DEFAULT_MAX_ELAPSED_TIME = 30.0
|
14
|
+
# the default interval multiplier
|
15
|
+
DEFAULT_INTERVAL_MULTIPLIER = 1.5
|
16
|
+
# the default randomize factor
|
17
|
+
DEFAULT_RANDOMIZE_FACTOR = 0.25
|
18
|
+
|
19
|
+
# default options used by a retrier unless others specified at initialization
|
20
|
+
DEFAULT_OPTIONS = {
|
21
|
+
min_interval: DEFAULT_MIN_INTERVAL,
|
22
|
+
max_elapsed_time: DEFAULT_MAX_ELAPSED_TIME,
|
23
|
+
multiplier: DEFAULT_INTERVAL_MULTIPLIER,
|
24
|
+
randomize_factor: DEFAULT_RANDOMIZE_FACTOR
|
25
|
+
}
|
26
|
+
|
27
|
+
# Builds a retrier that will retry a maximum retries number of times.
|
28
|
+
def initialize(max_retries, options=nil)
|
29
|
+
options = DEFAULT_OPTIONS.merge(options || {})
|
30
|
+
|
31
|
+
@max_retries = max_retries
|
32
|
+
@retries = 0
|
33
|
+
@continue = true
|
34
|
+
@backoff = ExponentialBackoff.new(options[:min_interval], options[:max_elapsed_time])
|
35
|
+
@backoff.multiplier = options[:multiplier]
|
36
|
+
@backoff.randomize_factor = options[:randomize_factor]
|
37
|
+
end
|
38
|
+
|
39
|
+
# Resets the retrier.
|
40
|
+
def reset!
|
41
|
+
@retries = 0
|
42
|
+
@backoff.clear
|
43
|
+
end
|
44
|
+
|
45
|
+
# Returns true if the retrier is currently retrying.
|
46
|
+
def retrying?
|
47
|
+
@retries != 0
|
48
|
+
end
|
49
|
+
|
50
|
+
# Stops retrying even after a reset. To be used from the code being retried.
|
51
|
+
def stop
|
52
|
+
@continue = false
|
53
|
+
end
|
54
|
+
|
55
|
+
# Retries the block of code provided according to the configuration of the
|
56
|
+
# retrier.
|
57
|
+
def retry(&block)
|
58
|
+
# TODO: manage exceptions
|
59
|
+
while @continue && @retries < @max_retries
|
60
|
+
res = yield
|
61
|
+
if @continue
|
62
|
+
@retries += 1
|
63
|
+
sleep(@backoff.next_interval)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
res
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,429 @@
|
|
1
|
+
require 'eventmachine'
|
2
|
+
require 'em-http-request'
|
3
|
+
require 'multi_json'
|
4
|
+
require 'void_logger'
|
5
|
+
|
6
|
+
require 'powertrack/errors'
|
7
|
+
require 'powertrack/streaming/api'
|
8
|
+
require 'powertrack/streaming/data_buffer'
|
9
|
+
require 'powertrack/streaming/retrier'
|
10
|
+
|
11
|
+
module PowerTrack
|
12
|
+
# A PowerTrack stream to be used for both updating the rules and collecting
|
13
|
+
# new messages.
|
14
|
+
class Stream
|
15
|
+
# Includes the PowerTrack Stream API
|
16
|
+
include PowerTrack::API
|
17
|
+
# Includes a logger, void by default
|
18
|
+
include VoidLogger::LoggerMixin
|
19
|
+
|
20
|
+
# The format of the URLs to connect to the various stream services
|
21
|
+
FEATURE_URL_FORMAT = "https://%s:%s/accounts/%s/publishers/%s/streams/track/%s%s.json".freeze
|
22
|
+
|
23
|
+
# The default timeout on a connection to PowerTrack. Can be overriden per call.
|
24
|
+
DEFAULT_CONNECTION_TIMEOUT = 30
|
25
|
+
|
26
|
+
# The default timeout for inactivity on a connection to PowerTrack. Can be
|
27
|
+
# overriden per call.
|
28
|
+
DEFAULT_INACTIVITY_TIMEOUT = 50
|
29
|
+
|
30
|
+
# The default options for using the stream.
|
31
|
+
DEFAULT_STREAM_OPTIONS = {
|
32
|
+
connect_timeout: DEFAULT_CONNECTION_TIMEOUT,
|
33
|
+
inactivity_timeout: DEFAULT_INACTIVITY_TIMEOUT,
|
34
|
+
# use a client id if you want to leverage the Backfill feature
|
35
|
+
client_id: nil
|
36
|
+
}
|
37
|
+
|
38
|
+
DEFAULT_OK_RESPONSE_STATUS = 200
|
39
|
+
|
40
|
+
# the patterns used to identify the various types of message received from GNIP
|
41
|
+
# everything else is an activity
|
42
|
+
HEARTBEAT_MESSAGE_PATTERN = /\A\s*\z/
|
43
|
+
SYSTEM_MESSAGE_PATTERN = /\A\s*\{\s*"(info|warn|error)":/mi
|
44
|
+
|
45
|
+
attr_reader :username, :account_name, :data_source, :label
|
46
|
+
|
47
|
+
def initialize(username, password, account_name, data_source, label, options=nil)
|
48
|
+
@username = username
|
49
|
+
@password = password
|
50
|
+
@account_name = account_name
|
51
|
+
@data_source = data_source
|
52
|
+
@label = label
|
53
|
+
@options = DEFAULT_STREAM_OPTIONS.merge(options || {})
|
54
|
+
@client_id = @options[:client_id]
|
55
|
+
end
|
56
|
+
|
57
|
+
# Adds many rules to your PowerTrack stream’s ruleset.
|
58
|
+
#
|
59
|
+
# <tt>POST /rules</tt>
|
60
|
+
#
|
61
|
+
# See http://support.gnip.com/apis/powertrack/api_reference.html#AddRules
|
62
|
+
def add_rules(*rules)
|
63
|
+
# flatten the rules in case it was provided as an array
|
64
|
+
make_rules_request(:post, body: MultiJson.encode('rules' => rules.flatten), ok: 201)
|
65
|
+
end
|
66
|
+
|
67
|
+
# Removes the specified rules from the stream.
|
68
|
+
#
|
69
|
+
# <tt>DELETE /rules</tt>
|
70
|
+
#
|
71
|
+
# See http://support.gnip.com/apis/powertrack/api_reference.html#DeleteRules
|
72
|
+
def delete_rules(*rules)
|
73
|
+
# flatten the rules in case it was provided as an array
|
74
|
+
make_rules_request(:delete, body: MultiJson.encode('rules' => rules.flatten))
|
75
|
+
end
|
76
|
+
|
77
|
+
DEFAULT_LIST_RULES_OPTIONS = {
|
78
|
+
compressed: true,
|
79
|
+
objectify: true
|
80
|
+
}.freeze
|
81
|
+
|
82
|
+
# Retrieves all existing rules for a stream.
|
83
|
+
#
|
84
|
+
# Returns an array of PowerTrack::Rule objects when the response permits so.
|
85
|
+
#
|
86
|
+
# <tt>GET /rules</tt>
|
87
|
+
#
|
88
|
+
# See http://support.gnip.com/apis/powertrack/api_reference.html#ListRules
|
89
|
+
def list_rules(options=nil)
|
90
|
+
options = DEFAULT_LIST_RULES_OPTIONS.merge(options || {})
|
91
|
+
res = make_rules_request(:get, headers: gzip_compressed_header(options[:compressed]))
|
92
|
+
|
93
|
+
# return Rule objects when required and feasible/appropriate
|
94
|
+
if options[:objectify] &&
|
95
|
+
res.is_a?(Hash) &&
|
96
|
+
(rules = res['rules']).is_a?(Array) &&
|
97
|
+
rules.all? { |rule| rule.is_a?(Hash) && rule.key?('value') }
|
98
|
+
rules.map { |rule| PowerTrack::Rule.new(rule['value'], rule['tag']) }
|
99
|
+
else
|
100
|
+
res
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
DEFAULT_TRACK_OPTIONS = {
|
105
|
+
# receive GZip-compressed payloads ?
|
106
|
+
compressed: true,
|
107
|
+
# max number of retries after a disconnection
|
108
|
+
max_retries: 3,
|
109
|
+
# advanced options to configure exponential backoff used for retries
|
110
|
+
backoff: nil,
|
111
|
+
# max number of seconds to wait for last message handlers to complete
|
112
|
+
stop_timeout: 10,
|
113
|
+
# pass message in raw form (JSON formatted string) instead of JSON-decoded
|
114
|
+
# Ruby objects to message handlers
|
115
|
+
raw: false,
|
116
|
+
# called for each message received, except heartbeats
|
117
|
+
on_message: nil,
|
118
|
+
# called for each activity received
|
119
|
+
on_activity: nil,
|
120
|
+
# called for each system message received
|
121
|
+
on_system: nil,
|
122
|
+
# called for each heartbeat received
|
123
|
+
on_heartbeat: nil,
|
124
|
+
# called periodically to detect if the tracked has to be closed
|
125
|
+
close_now: nil
|
126
|
+
}.freeze
|
127
|
+
|
128
|
+
# Establishes a persistent connection to the PowerTrack data stream,
|
129
|
+
# through which the social data will be delivered.
|
130
|
+
#
|
131
|
+
# <tt>GET /track/:stream</tt>
|
132
|
+
#
|
133
|
+
# See http://support.gnip.com/apis/powertrack/api_reference.html#Stream
|
134
|
+
def track(options=nil)
|
135
|
+
options = DEFAULT_TRACK_OPTIONS.merge(options || {})
|
136
|
+
retrier = PowerTrack::Retrier.new(options[:max_retries])
|
137
|
+
handle_api_response(*retrier.retry { track_once(options, retrier) })
|
138
|
+
end
|
139
|
+
|
140
|
+
private
|
141
|
+
|
142
|
+
# Returns the fully-qualified domain name of a GNIP PowerTrack server
|
143
|
+
# based on a hostname.
|
144
|
+
def gnip_server_name(hostname)
|
145
|
+
"%s.gnip.com" % [ hostname ]
|
146
|
+
end
|
147
|
+
|
148
|
+
# Returns the port used by GNIP PowerTrack servers.
|
149
|
+
def gnip_server_port
|
150
|
+
'443'
|
151
|
+
end
|
152
|
+
|
153
|
+
# Returns the URL of the stream for a given feature.
|
154
|
+
def feature_url(hostname, feature=nil)
|
155
|
+
feature = feature ? "/#{feature}" : ''
|
156
|
+
_url = FEATURE_URL_FORMAT %
|
157
|
+
[ gnip_server_name(hostname),
|
158
|
+
gnip_server_port,
|
159
|
+
@account_name,
|
160
|
+
@data_source,
|
161
|
+
@label,
|
162
|
+
feature ]
|
163
|
+
|
164
|
+
_url += "?client=#{@client_id}" if @client_id
|
165
|
+
|
166
|
+
_url
|
167
|
+
end
|
168
|
+
|
169
|
+
# Returns the HTTP header that turns on GZip-based compression if required.
|
170
|
+
# Each call returns a new hash which can be safely modified by the caller.
|
171
|
+
def gzip_compressed_header(compressed)
|
172
|
+
compressed ? { 'accept-encoding' => 'gzip, compressed' } : {}
|
173
|
+
end
|
174
|
+
|
175
|
+
# Returns the authorization header to join to the HTTP request.
|
176
|
+
def auth_header
|
177
|
+
{ 'authorization' => [ @username, @password ] }
|
178
|
+
end
|
179
|
+
|
180
|
+
# Returns the HTTP headers common to each valid PowerTrack connection.
|
181
|
+
# Each call returns a new hash which can be safely modified by the caller.
|
182
|
+
def connection_headers
|
183
|
+
{ connect_timeout: @options[:connect_timeout],
|
184
|
+
inactivity_timeout: @options[:inactivity_timeout] }
|
185
|
+
end
|
186
|
+
|
187
|
+
# Opens a new connection to GNIP PowerTrack.
|
188
|
+
def connect(hostname, feature=nil)
|
189
|
+
url = feature_url(hostname, feature)
|
190
|
+
EventMachine::HttpRequest.new(url, connection_headers)
|
191
|
+
end
|
192
|
+
|
193
|
+
# Returns the HTTP headers common to each valid PowerTrack request.
|
194
|
+
# Each call returns a new hash which can be safely modified by the caller.
|
195
|
+
def common_req_headers
|
196
|
+
{ 'accept' => 'application/json',
|
197
|
+
'content-type' => 'application/json; charset=utf-8',
|
198
|
+
:redirects => 3 }.merge(auth_header)
|
199
|
+
end
|
200
|
+
|
201
|
+
# Returns the HTTP headers common to each valid /rules request.
|
202
|
+
# Each call returns a new hash which can be safely modified by the caller.
|
203
|
+
def rules_req_headers
|
204
|
+
common_req_headers
|
205
|
+
end
|
206
|
+
|
207
|
+
# Parses a JSON-formatted body received as the response of a PowerTrack API
|
208
|
+
# request.
|
209
|
+
#
|
210
|
+
# Returns nil when the body is empty, the Ruby object decoded from the
|
211
|
+
# JSON-formatted body otherwise.
|
212
|
+
#
|
213
|
+
# If the parsing fails, returns the value returned by the given block which
|
214
|
+
# is called with the textual body as a single argument. If no block id,
|
215
|
+
# return the textual body initially received.
|
216
|
+
def parse_json_body(body, &block)
|
217
|
+
body = (body || '').strip
|
218
|
+
begin
|
219
|
+
body == '' ? nil : MultiJson.load(body)
|
220
|
+
rescue
|
221
|
+
if block_given?
|
222
|
+
yield($!)
|
223
|
+
else
|
224
|
+
body
|
225
|
+
end
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
229
|
+
# Returns an appropriate return value or exception according to the response
|
230
|
+
# obtained on an API request.
|
231
|
+
def handle_api_response(status, error, body, ok=DEFAULT_OK_RESPONSE_STATUS)
|
232
|
+
case status
|
233
|
+
when nil
|
234
|
+
# connection issue
|
235
|
+
raise PowerTrack::ConnectionError.new(error)
|
236
|
+
when ok
|
237
|
+
# successful call: return the body unless there isn't any
|
238
|
+
return nil if body.nil?
|
239
|
+
|
240
|
+
parse_json_body(body) do |exception|
|
241
|
+
# invalid JSON response
|
242
|
+
raise PowerTrack::InvalidResponseError.new(ok, exception.message, body)
|
243
|
+
end
|
244
|
+
else
|
245
|
+
# specified response status
|
246
|
+
raise PowerTrack::WithStatusPowerTrackError.build(status, error, parse_json_body(body))
|
247
|
+
end
|
248
|
+
end
|
249
|
+
|
250
|
+
DEFAULT_RULES_REQUEST_OPTIONS = {
|
251
|
+
ok: DEFAULT_OK_RESPONSE_STATUS,
|
252
|
+
headers: {},
|
253
|
+
body: nil
|
254
|
+
}
|
255
|
+
|
256
|
+
# Makes a rules-related request with a specific HTTP verb and a few options.
|
257
|
+
# Returns the response if successful or an exception if the request failed.
|
258
|
+
def make_rules_request(verb, options=nil)
|
259
|
+
options = DEFAULT_RULES_REQUEST_OPTIONS.merge(options || {})
|
260
|
+
resp_status = nil
|
261
|
+
resp_error = nil
|
262
|
+
resp_body = nil
|
263
|
+
|
264
|
+
EM.run do
|
265
|
+
con = connect('api', 'rules')
|
266
|
+
http = con.setup_request(verb,
|
267
|
+
head: rules_req_headers.merge(options[:headers]),
|
268
|
+
body: options[:body])
|
269
|
+
|
270
|
+
http.errback do
|
271
|
+
resp_error = http.error
|
272
|
+
EM.stop
|
273
|
+
end
|
274
|
+
|
275
|
+
http.callback do
|
276
|
+
resp_status = http.response_header.status
|
277
|
+
resp_error = http.error
|
278
|
+
resp_body = http.response
|
279
|
+
EM.stop
|
280
|
+
end
|
281
|
+
end
|
282
|
+
|
283
|
+
handle_api_response(resp_status, resp_error, resp_body, options[:ok])
|
284
|
+
end
|
285
|
+
|
286
|
+
# Returns the type of message received on the stream, nil when the type
|
287
|
+
# cannot be identified.
|
288
|
+
def message_type(message)
|
289
|
+
case message
|
290
|
+
when HEARTBEAT_MESSAGE_PATTERN then :heartbeat
|
291
|
+
when SYSTEM_MESSAGE_PATTERN then :system
|
292
|
+
else
|
293
|
+
:activity
|
294
|
+
end
|
295
|
+
end
|
296
|
+
|
297
|
+
# Returns the HTTP headers for each valid /track request.
|
298
|
+
# Each call returns a new hash which can be safely modified by the caller.
|
299
|
+
def track_req_headers(compressed)
|
300
|
+
common_req_headers.merge('connection' => 'keep-alive')
|
301
|
+
.merge(gzip_compressed_header(compressed))
|
302
|
+
end
|
303
|
+
|
304
|
+
# Connects to the /track endpoint and manages reconnections when being
|
305
|
+
# disconnected.
|
306
|
+
def track_once(options, retrier)
|
307
|
+
logger.info "Starting tracker for retry ##{retrier.retries}..."
|
308
|
+
stop_timeout = options[:stop_timeout]
|
309
|
+
on_heartbeat = options[:on_heartbeat]
|
310
|
+
on_message = options[:on_message]
|
311
|
+
on_activity = options[:on_activity]
|
312
|
+
close_now = options[:close_now] || lambda { false }
|
313
|
+
|
314
|
+
buffer = PowerTrack::DataBuffer.new
|
315
|
+
closed = false
|
316
|
+
disconnected = false
|
317
|
+
resp_status = DEFAULT_OK_RESPONSE_STATUS
|
318
|
+
resp_error = nil
|
319
|
+
resp_body = nil
|
320
|
+
|
321
|
+
EM.run do
|
322
|
+
logger.info "Starting the reactor..."
|
323
|
+
con = connect('stream')
|
324
|
+
http = con.get(head: track_req_headers(options[:compressed]))
|
325
|
+
|
326
|
+
# polls to see if the connection should be closed
|
327
|
+
close_watcher = EM.add_periodic_timer(1) do
|
328
|
+
# exit if required
|
329
|
+
if close_now.call
|
330
|
+
logger.info "Time to close the tracker"
|
331
|
+
closed = true
|
332
|
+
close_watcher.cancel
|
333
|
+
con.close
|
334
|
+
end
|
335
|
+
end
|
336
|
+
|
337
|
+
# simulate periodic disconnections
|
338
|
+
if options[:fake_disconnections]
|
339
|
+
EM.add_timer(rand(options[:fake_disconnections])) do
|
340
|
+
con.close
|
341
|
+
end
|
342
|
+
end
|
343
|
+
|
344
|
+
http.stream do |chunk|
|
345
|
+
# ignore data if already disconnected, thus avoiding synchronizing the
|
346
|
+
# buffer. Nevertheless, this should never happen...
|
347
|
+
# TODO: log a warning if it happens
|
348
|
+
|
349
|
+
if disconnected
|
350
|
+
logger.warn "Message received while already disconnected"
|
351
|
+
next
|
352
|
+
end
|
353
|
+
|
354
|
+
# reset retries when some (valid) data are received
|
355
|
+
if retrier.retrying?
|
356
|
+
logger.info "Resetting retries..."
|
357
|
+
retrier.reset!
|
358
|
+
end
|
359
|
+
|
360
|
+
# process the chunk
|
361
|
+
buffer.process(chunk) do |raw|
|
362
|
+
logger.debug "New message received"
|
363
|
+
EM.defer do
|
364
|
+
# select the right message handler(s) according to the message type
|
365
|
+
m_type = message_type(raw)
|
366
|
+
|
367
|
+
if m_type == :heartbeat
|
368
|
+
on_heartbeat.call if on_heartbeat
|
369
|
+
else
|
370
|
+
# JSON decoding if required
|
371
|
+
message = options[:raw] ? raw : MultiJson.decode(raw)
|
372
|
+
|
373
|
+
on_message.call(message) if on_message
|
374
|
+
|
375
|
+
case m_type
|
376
|
+
when :system then on_system.call(message) if on_system
|
377
|
+
when :activity then on_activity.call(message) if on_activity
|
378
|
+
end
|
379
|
+
end
|
380
|
+
|
381
|
+
# TODO: manage exceptions at this level
|
382
|
+
end
|
383
|
+
end
|
384
|
+
end
|
385
|
+
|
386
|
+
# reconnection on error
|
387
|
+
reconnect_cb = lambda do |http_client|
|
388
|
+
logger.info "Disconnected after #{retrier.retries} retries"
|
389
|
+
disconnected = true
|
390
|
+
|
391
|
+
if closed
|
392
|
+
# close immediately if required
|
393
|
+
wait_til_defers_finish_and_stop(stop_timeout)
|
394
|
+
# tell the retrier the tracking is over
|
395
|
+
retrier.stop
|
396
|
+
else
|
397
|
+
# cancel the periodic close watcher
|
398
|
+
close_watcher.cancel
|
399
|
+
|
400
|
+
resp_status = http_client.response_header.status || DEFAULT_OK_RESPONSE_STATUS
|
401
|
+
resp_error = http_client.error
|
402
|
+
resp_body = http_client.response
|
403
|
+
wait_til_defers_finish_and_stop(stop_timeout)
|
404
|
+
end
|
405
|
+
end
|
406
|
+
|
407
|
+
http.callback(&reconnect_cb)
|
408
|
+
http.errback(&reconnect_cb)
|
409
|
+
end
|
410
|
+
|
411
|
+
[ resp_status, resp_error, resp_body ]
|
412
|
+
end
|
413
|
+
|
414
|
+
# Waits for all the deferrable threads to complete, then stops the reactor.
|
415
|
+
def wait_til_defers_finish_and_stop(timeout)
|
416
|
+
# wait for defers to terminate but no more than timeout...
|
417
|
+
start = Time.now
|
418
|
+
defers_waiter = EM.add_periodic_timer(0.2) do
|
419
|
+
logger.info "Waiting for defers..."
|
420
|
+
if EM.defers_finished? || (Time.now - start) > timeout
|
421
|
+
defers_waiter.cancel
|
422
|
+
end
|
423
|
+
end
|
424
|
+
ensure
|
425
|
+
logger.info "Stopping the reactor..."
|
426
|
+
EM.stop
|
427
|
+
end
|
428
|
+
end
|
429
|
+
end
|