powertrack 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/.gitignore +41 -0
- data/Gemfile +8 -0
- data/Gemfile.lock +43 -0
- data/LICENSE.md +25 -0
- data/README.md +117 -0
- data/Rakefile +16 -0
- data/TODO.md +82 -0
- data/lib/core_ext/class.rb +7 -0
- data/lib/powertrack.rb +10 -0
- data/lib/powertrack/errors.rb +110 -0
- data/lib/powertrack/rules/rule.rb +140 -0
- data/lib/powertrack/rules/string_extension.rb +9 -0
- data/lib/powertrack/streaming/api.rb +64 -0
- data/lib/powertrack/streaming/data_buffer.rb +36 -0
- data/lib/powertrack/streaming/retrier.rb +70 -0
- data/lib/powertrack/streaming/stream.rb +429 -0
- data/lib/powertrack/version.rb +3 -0
- data/powertrack.gemspec +32 -0
- data/test/minitest_helper.rb +41 -0
- data/test/test_manage_rules.rb +30 -0
- data/test/test_rule.rb +163 -0
- data/test/test_track_stream.rb +72 -0
- metadata +202 -0
@@ -0,0 +1,140 @@
|
|
1
|
+
require 'multi_json'
|
2
|
+
|
3
|
+
module PowerTrack
|
4
|
+
# A PowerTrack rule with its components and restrictions.
|
5
|
+
class Rule
|
6
|
+
|
7
|
+
# The maximum length of a rule tag.
|
8
|
+
MAX_TAG_LENGTH = 255
|
9
|
+
|
10
|
+
# The maximum lengh of the value of a standard rule
|
11
|
+
MAX_STD_RULE_VALUE_LENGTH = 1024
|
12
|
+
|
13
|
+
# The maximum lengh of the value of a long rule
|
14
|
+
MAX_LONG_RULE_VALUE_LENGTH = 2048
|
15
|
+
|
16
|
+
# The maximum number of positive terms in a single rule value
|
17
|
+
MAX_POSITIVE_TERMS = 30
|
18
|
+
|
19
|
+
# The maximum number of negative terms in a single rule value
|
20
|
+
MAX_NEGATIVE_TERMS = 50
|
21
|
+
|
22
|
+
attr_reader :value, :tag, :error
|
23
|
+
|
24
|
+
# Builds a new rule based on a value and an optional tag.
|
25
|
+
# By default, the constructor assesses if it's a long rule or not
|
26
|
+
# based on the length of the value. But the 'long' feature can be
|
27
|
+
# explicitly specified with the third parameter.
|
28
|
+
def initialize(value, tag=nil, long=nil)
|
29
|
+
@value = value || ''
|
30
|
+
@tag = tag
|
31
|
+
# check if long is a boolean
|
32
|
+
@long = long == !!long ? long : @value.size > MAX_STD_RULE_VALUE_LENGTH
|
33
|
+
@error = nil
|
34
|
+
end
|
35
|
+
|
36
|
+
# Returns true if the rule is long.
|
37
|
+
def long?
|
38
|
+
@long
|
39
|
+
end
|
40
|
+
|
41
|
+
# Returns true if the rule is valid, false otherwise. The validation error
|
42
|
+
# can be through the error method.
|
43
|
+
def valid?
|
44
|
+
# reset error
|
45
|
+
@error = nil
|
46
|
+
|
47
|
+
[ :too_long_value?,
|
48
|
+
:too_many_positive_terms?,
|
49
|
+
:too_many_negative_terms?,
|
50
|
+
:contains_empty_source?,
|
51
|
+
:contains_negated_or?,
|
52
|
+
:too_long_tag? ].each do |validator|
|
53
|
+
|
54
|
+
# stop when 1 validator fails
|
55
|
+
if self.send(validator)
|
56
|
+
@error = validator.to_s.gsub(/_/, ' ').gsub(/\?/, '').capitalize
|
57
|
+
return false
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
true
|
62
|
+
end
|
63
|
+
|
64
|
+
# Dumps the rule in a valid JSON format.
|
65
|
+
def to_json
|
66
|
+
MultiJson.encode(to_hash)
|
67
|
+
end
|
68
|
+
|
69
|
+
# Converts the rule in a Hash.
|
70
|
+
def to_hash
|
71
|
+
res = {:value => @value}
|
72
|
+
res[:tag] = @tag unless @tag.nil?
|
73
|
+
res
|
74
|
+
end
|
75
|
+
|
76
|
+
# Converts the rule in a string, the JSON representation of the rule actually.
|
77
|
+
def to_s
|
78
|
+
to_json
|
79
|
+
end
|
80
|
+
|
81
|
+
# Returns true when the rule is equal to the other rule provided.
|
82
|
+
def ==(other)
|
83
|
+
other.class == self.class &&
|
84
|
+
other.value == @value &&
|
85
|
+
other.tag == @tag &&
|
86
|
+
other.long? == self.long?
|
87
|
+
end
|
88
|
+
|
89
|
+
alias eql? ==
|
90
|
+
|
91
|
+
# Returns a hash for the rule based on its components. Useful for using
|
92
|
+
# rules as Hash keys.
|
93
|
+
def hash
|
94
|
+
# let's assume a nil value for @value or @tag is not different from the empty value
|
95
|
+
"v:#{@value},t:#{@tag},l:#{@long}".hash
|
96
|
+
end
|
97
|
+
|
98
|
+
# Returns the maximum length of the rule value according to the type of the
|
99
|
+
# rule (long or standard).
|
100
|
+
def max_value_length
|
101
|
+
long? ? MAX_LONG_RULE_VALUE_LENGTH : MAX_STD_RULE_VALUE_LENGTH
|
102
|
+
end
|
103
|
+
|
104
|
+
protected
|
105
|
+
|
106
|
+
# Is the rule value too long ?
|
107
|
+
def too_long_value?
|
108
|
+
@value.size > max_value_length
|
109
|
+
end
|
110
|
+
|
111
|
+
# Does the rule value contain a forbidden negated OR ?
|
112
|
+
def contains_negated_or?
|
113
|
+
!@value[/\-\w+ OR/].nil? || !@value[/OR \-\w+/].nil?
|
114
|
+
end
|
115
|
+
|
116
|
+
# Does the rule value contain too many positive terms ?
|
117
|
+
def too_many_positive_terms?
|
118
|
+
return false if long?
|
119
|
+
# negative look-behind; see http://www.rexegg.com/regex-disambiguation.html
|
120
|
+
# exclude the OR operator from the terms being counted
|
121
|
+
@value.scan(/(?<!-)(\b[\w:]+|\"[\-\s\w:]+\"\b)/).select { |match| match.first != 'OR' }.size > MAX_POSITIVE_TERMS
|
122
|
+
end
|
123
|
+
|
124
|
+
# Does the rule value contain too many negative terms ?
|
125
|
+
def too_many_negative_terms?
|
126
|
+
return false if long?
|
127
|
+
@value.scan(/(^| )\-(\w|\([^(]*\)|\"[^"]*\")/).size > MAX_NEGATIVE_TERMS
|
128
|
+
end
|
129
|
+
|
130
|
+
# Does the rule value contain an empty source ?
|
131
|
+
def contains_empty_source?
|
132
|
+
!@value[/source\:\s/].nil?
|
133
|
+
end
|
134
|
+
|
135
|
+
# Is the rule tag too long ?
|
136
|
+
def too_long_tag?
|
137
|
+
@tag && @tag.size > MAX_TAG_LENGTH
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
@@ -0,0 +1,9 @@
|
|
1
|
+
require 'powertrack/rules/rule'
|
2
|
+
|
3
|
+
# Extend core String class with a rule transformer
|
4
|
+
class String
|
5
|
+
# Returns a PowerTrace::Rule instance based on the value of the string.
|
6
|
+
def to_pwtk_rule(tag=nil, long=nil)
|
7
|
+
PowerTrack::Rule.new(self, tag, long)
|
8
|
+
end
|
9
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
module PowerTrack
|
2
|
+
module API
|
3
|
+
# Adds many rules to your PowerTrack stream’s ruleset.
|
4
|
+
#
|
5
|
+
# <tt>POST /rules</tt>
|
6
|
+
#
|
7
|
+
# See http://support.gnip.com/apis/powertrack/api_reference.html#AddRules
|
8
|
+
def add_rules(*rules)
|
9
|
+
raise NotImplementedError
|
10
|
+
end
|
11
|
+
|
12
|
+
# Adds one rule to your PowerTrack stream’s ruleset.
|
13
|
+
#
|
14
|
+
# <tt>POST /rules</tt>
|
15
|
+
#
|
16
|
+
# See http://support.gnip.com/apis/powertrack/api_reference.html#AddRules
|
17
|
+
def add_rule(rule)
|
18
|
+
add_rules(rule)
|
19
|
+
end
|
20
|
+
|
21
|
+
# Removes the specified rules from the stream.
|
22
|
+
#
|
23
|
+
# <tt>DELETE /rules</tt>
|
24
|
+
#
|
25
|
+
# See http://support.gnip.com/apis/powertrack/api_reference.html#DeleteRules
|
26
|
+
def delete_rules(*rules)
|
27
|
+
raise NotImplementedError
|
28
|
+
end
|
29
|
+
|
30
|
+
# Removes the specified rule from the stream.
|
31
|
+
#
|
32
|
+
# <tt>DELETE /rules</tt>
|
33
|
+
#
|
34
|
+
# See http://support.gnip.com/apis/powertrack/api_reference.html#DeleteRules
|
35
|
+
def delete_rule(rule)
|
36
|
+
delete_rules(rule)
|
37
|
+
end
|
38
|
+
|
39
|
+
# Retrieves all existing rules for a stream.
|
40
|
+
#
|
41
|
+
# <tt>GET /rules</tt>
|
42
|
+
#
|
43
|
+
# See http://support.gnip.com/apis/powertrack/api_reference.html#ListRules
|
44
|
+
#
|
45
|
+
# Options:
|
46
|
+
# o compressed: [true|false] To demand gzip-compressed response from GNIP
|
47
|
+
# true by default
|
48
|
+
# o objectify: [true|false] To demand PowerTrack::Rule object as results
|
49
|
+
# instead of raw JSON. True by default.
|
50
|
+
def list_rules(options=nil)
|
51
|
+
raise NotImplementedError
|
52
|
+
end
|
53
|
+
|
54
|
+
# Establishes a persistent connection to the PowerTrack data stream,
|
55
|
+
# through which the social data will be delivered.
|
56
|
+
#
|
57
|
+
# <tt>GET /track/:stream</tt>
|
58
|
+
#
|
59
|
+
# See http://support.gnip.com/apis/powertrack/api_reference.html#Stream
|
60
|
+
def track(options=nil)
|
61
|
+
raise NotImplementedError
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module PowerTrack
|
2
|
+
# A buffer of data received from PowerTrack. Useful for managing the sequential
|
3
|
+
# chunk of bytes sent of the stream by GNIP and slice them into well-formatted
|
4
|
+
# messages.
|
5
|
+
class DataBuffer
|
6
|
+
|
7
|
+
# The pattern used by GNIP PowerTrack to delimitate a single message.
|
8
|
+
MESSAGE_PATTERN = /^([^\r]*)\r\n/m
|
9
|
+
|
10
|
+
# Builds a new data buffer.
|
11
|
+
def initialize
|
12
|
+
@buffer = ''
|
13
|
+
end
|
14
|
+
|
15
|
+
# Add a chunk of bytes to the buffer and pass the new message(s) extracted
|
16
|
+
# to the block provided.
|
17
|
+
def process(chunk, &block)
|
18
|
+
@buffer.concat(chunk)
|
19
|
+
@buffer.gsub!(MESSAGE_PATTERN) do |match|
|
20
|
+
yield($1.to_s) if block_given?
|
21
|
+
# erase the message
|
22
|
+
''
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
# The current size of the buffer.
|
27
|
+
def size
|
28
|
+
@buffer.size
|
29
|
+
end
|
30
|
+
|
31
|
+
# Resets the buffer, therefore losing any bytes received from PowerTrack.
|
32
|
+
def reset!
|
33
|
+
@buffer = ''
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
require 'exponential_backoff'
|
2
|
+
|
3
|
+
module PowerTrack
|
4
|
+
# A utility class that manges an exponential backoff retry pattern.
|
5
|
+
# Additionally, this king of retrier can be reset or stopped by the code being
|
6
|
+
# retried.
|
7
|
+
class Retrier
|
8
|
+
attr_reader :retries, :max_retries
|
9
|
+
|
10
|
+
# the default minimum number of seconds b/w 2 attempts
|
11
|
+
DEFAULT_MIN_INTERVAL = 1.0
|
12
|
+
# the default maximum number of seconds to wait b/w 2 attempts
|
13
|
+
DEFAULT_MAX_ELAPSED_TIME = 30.0
|
14
|
+
# the default interval multiplier
|
15
|
+
DEFAULT_INTERVAL_MULTIPLIER = 1.5
|
16
|
+
# the default randomize factor
|
17
|
+
DEFAULT_RANDOMIZE_FACTOR = 0.25
|
18
|
+
|
19
|
+
# default options used by a retrier unless others specified at initialization
|
20
|
+
DEFAULT_OPTIONS = {
|
21
|
+
min_interval: DEFAULT_MIN_INTERVAL,
|
22
|
+
max_elapsed_time: DEFAULT_MAX_ELAPSED_TIME,
|
23
|
+
multiplier: DEFAULT_INTERVAL_MULTIPLIER,
|
24
|
+
randomize_factor: DEFAULT_RANDOMIZE_FACTOR
|
25
|
+
}
|
26
|
+
|
27
|
+
# Builds a retrier that will retry a maximum retries number of times.
|
28
|
+
def initialize(max_retries, options=nil)
|
29
|
+
options = DEFAULT_OPTIONS.merge(options || {})
|
30
|
+
|
31
|
+
@max_retries = max_retries
|
32
|
+
@retries = 0
|
33
|
+
@continue = true
|
34
|
+
@backoff = ExponentialBackoff.new(options[:min_interval], options[:max_elapsed_time])
|
35
|
+
@backoff.multiplier = options[:multiplier]
|
36
|
+
@backoff.randomize_factor = options[:randomize_factor]
|
37
|
+
end
|
38
|
+
|
39
|
+
# Resets the retrier.
|
40
|
+
def reset!
|
41
|
+
@retries = 0
|
42
|
+
@backoff.clear
|
43
|
+
end
|
44
|
+
|
45
|
+
# Returns true if the retrier is currently retrying.
|
46
|
+
def retrying?
|
47
|
+
@retries != 0
|
48
|
+
end
|
49
|
+
|
50
|
+
# Stops retrying even after a reset. To be used from the code being retried.
|
51
|
+
def stop
|
52
|
+
@continue = false
|
53
|
+
end
|
54
|
+
|
55
|
+
# Retries the block of code provided according to the configuration of the
|
56
|
+
# retrier.
|
57
|
+
def retry(&block)
|
58
|
+
# TODO: manage exceptions
|
59
|
+
while @continue && @retries < @max_retries
|
60
|
+
res = yield
|
61
|
+
if @continue
|
62
|
+
@retries += 1
|
63
|
+
sleep(@backoff.next_interval)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
res
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,429 @@
|
|
1
|
+
require 'eventmachine'
|
2
|
+
require 'em-http-request'
|
3
|
+
require 'multi_json'
|
4
|
+
require 'void_logger'
|
5
|
+
|
6
|
+
require 'powertrack/errors'
|
7
|
+
require 'powertrack/streaming/api'
|
8
|
+
require 'powertrack/streaming/data_buffer'
|
9
|
+
require 'powertrack/streaming/retrier'
|
10
|
+
|
11
|
+
module PowerTrack
|
12
|
+
# A PowerTrack stream to be used for both updating the rules and collecting
|
13
|
+
# new messages.
|
14
|
+
class Stream
|
15
|
+
# Includes the PowerTrack Stream API
|
16
|
+
include PowerTrack::API
|
17
|
+
# Includes a logger, void by default
|
18
|
+
include VoidLogger::LoggerMixin
|
19
|
+
|
20
|
+
# The format of the URLs to connect to the various stream services
|
21
|
+
FEATURE_URL_FORMAT = "https://%s:%s/accounts/%s/publishers/%s/streams/track/%s%s.json".freeze
|
22
|
+
|
23
|
+
# The default timeout on a connection to PowerTrack. Can be overriden per call.
|
24
|
+
DEFAULT_CONNECTION_TIMEOUT = 30
|
25
|
+
|
26
|
+
# The default timeout for inactivity on a connection to PowerTrack. Can be
|
27
|
+
# overriden per call.
|
28
|
+
DEFAULT_INACTIVITY_TIMEOUT = 50
|
29
|
+
|
30
|
+
# The default options for using the stream.
|
31
|
+
DEFAULT_STREAM_OPTIONS = {
|
32
|
+
connect_timeout: DEFAULT_CONNECTION_TIMEOUT,
|
33
|
+
inactivity_timeout: DEFAULT_INACTIVITY_TIMEOUT,
|
34
|
+
# use a client id if you want to leverage the Backfill feature
|
35
|
+
client_id: nil
|
36
|
+
}
|
37
|
+
|
38
|
+
DEFAULT_OK_RESPONSE_STATUS = 200
|
39
|
+
|
40
|
+
# the patterns used to identify the various types of message received from GNIP
|
41
|
+
# everything else is an activity
|
42
|
+
HEARTBEAT_MESSAGE_PATTERN = /\A\s*\z/
|
43
|
+
SYSTEM_MESSAGE_PATTERN = /\A\s*\{\s*"(info|warn|error)":/mi
|
44
|
+
|
45
|
+
attr_reader :username, :account_name, :data_source, :label
|
46
|
+
|
47
|
+
def initialize(username, password, account_name, data_source, label, options=nil)
|
48
|
+
@username = username
|
49
|
+
@password = password
|
50
|
+
@account_name = account_name
|
51
|
+
@data_source = data_source
|
52
|
+
@label = label
|
53
|
+
@options = DEFAULT_STREAM_OPTIONS.merge(options || {})
|
54
|
+
@client_id = @options[:client_id]
|
55
|
+
end
|
56
|
+
|
57
|
+
# Adds many rules to your PowerTrack stream’s ruleset.
|
58
|
+
#
|
59
|
+
# <tt>POST /rules</tt>
|
60
|
+
#
|
61
|
+
# See http://support.gnip.com/apis/powertrack/api_reference.html#AddRules
|
62
|
+
def add_rules(*rules)
|
63
|
+
# flatten the rules in case it was provided as an array
|
64
|
+
make_rules_request(:post, body: MultiJson.encode('rules' => rules.flatten), ok: 201)
|
65
|
+
end
|
66
|
+
|
67
|
+
# Removes the specified rules from the stream.
|
68
|
+
#
|
69
|
+
# <tt>DELETE /rules</tt>
|
70
|
+
#
|
71
|
+
# See http://support.gnip.com/apis/powertrack/api_reference.html#DeleteRules
|
72
|
+
def delete_rules(*rules)
|
73
|
+
# flatten the rules in case it was provided as an array
|
74
|
+
make_rules_request(:delete, body: MultiJson.encode('rules' => rules.flatten))
|
75
|
+
end
|
76
|
+
|
77
|
+
DEFAULT_LIST_RULES_OPTIONS = {
|
78
|
+
compressed: true,
|
79
|
+
objectify: true
|
80
|
+
}.freeze
|
81
|
+
|
82
|
+
# Retrieves all existing rules for a stream.
|
83
|
+
#
|
84
|
+
# Returns an array of PowerTrack::Rule objects when the response permits so.
|
85
|
+
#
|
86
|
+
# <tt>GET /rules</tt>
|
87
|
+
#
|
88
|
+
# See http://support.gnip.com/apis/powertrack/api_reference.html#ListRules
|
89
|
+
def list_rules(options=nil)
|
90
|
+
options = DEFAULT_LIST_RULES_OPTIONS.merge(options || {})
|
91
|
+
res = make_rules_request(:get, headers: gzip_compressed_header(options[:compressed]))
|
92
|
+
|
93
|
+
# return Rule objects when required and feasible/appropriate
|
94
|
+
if options[:objectify] &&
|
95
|
+
res.is_a?(Hash) &&
|
96
|
+
(rules = res['rules']).is_a?(Array) &&
|
97
|
+
rules.all? { |rule| rule.is_a?(Hash) && rule.key?('value') }
|
98
|
+
rules.map { |rule| PowerTrack::Rule.new(rule['value'], rule['tag']) }
|
99
|
+
else
|
100
|
+
res
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
DEFAULT_TRACK_OPTIONS = {
|
105
|
+
# receive GZip-compressed payloads ?
|
106
|
+
compressed: true,
|
107
|
+
# max number of retries after a disconnection
|
108
|
+
max_retries: 3,
|
109
|
+
# advanced options to configure exponential backoff used for retries
|
110
|
+
backoff: nil,
|
111
|
+
# max number of seconds to wait for last message handlers to complete
|
112
|
+
stop_timeout: 10,
|
113
|
+
# pass message in raw form (JSON formatted string) instead of JSON-decoded
|
114
|
+
# Ruby objects to message handlers
|
115
|
+
raw: false,
|
116
|
+
# called for each message received, except heartbeats
|
117
|
+
on_message: nil,
|
118
|
+
# called for each activity received
|
119
|
+
on_activity: nil,
|
120
|
+
# called for each system message received
|
121
|
+
on_system: nil,
|
122
|
+
# called for each heartbeat received
|
123
|
+
on_heartbeat: nil,
|
124
|
+
# called periodically to detect if the tracked has to be closed
|
125
|
+
close_now: nil
|
126
|
+
}.freeze
|
127
|
+
|
128
|
+
# Establishes a persistent connection to the PowerTrack data stream,
|
129
|
+
# through which the social data will be delivered.
|
130
|
+
#
|
131
|
+
# <tt>GET /track/:stream</tt>
|
132
|
+
#
|
133
|
+
# See http://support.gnip.com/apis/powertrack/api_reference.html#Stream
|
134
|
+
def track(options=nil)
|
135
|
+
options = DEFAULT_TRACK_OPTIONS.merge(options || {})
|
136
|
+
retrier = PowerTrack::Retrier.new(options[:max_retries])
|
137
|
+
handle_api_response(*retrier.retry { track_once(options, retrier) })
|
138
|
+
end
|
139
|
+
|
140
|
+
private
|
141
|
+
|
142
|
+
# Returns the fully-qualified domain name of a GNIP PowerTrack server
|
143
|
+
# based on a hostname.
|
144
|
+
def gnip_server_name(hostname)
|
145
|
+
"%s.gnip.com" % [ hostname ]
|
146
|
+
end
|
147
|
+
|
148
|
+
# Returns the port used by GNIP PowerTrack servers.
|
149
|
+
def gnip_server_port
|
150
|
+
'443'
|
151
|
+
end
|
152
|
+
|
153
|
+
# Returns the URL of the stream for a given feature.
|
154
|
+
def feature_url(hostname, feature=nil)
|
155
|
+
feature = feature ? "/#{feature}" : ''
|
156
|
+
_url = FEATURE_URL_FORMAT %
|
157
|
+
[ gnip_server_name(hostname),
|
158
|
+
gnip_server_port,
|
159
|
+
@account_name,
|
160
|
+
@data_source,
|
161
|
+
@label,
|
162
|
+
feature ]
|
163
|
+
|
164
|
+
_url += "?client=#{@client_id}" if @client_id
|
165
|
+
|
166
|
+
_url
|
167
|
+
end
|
168
|
+
|
169
|
+
# Returns the HTTP header that turns on GZip-based compression if required.
|
170
|
+
# Each call returns a new hash which can be safely modified by the caller.
|
171
|
+
def gzip_compressed_header(compressed)
|
172
|
+
compressed ? { 'accept-encoding' => 'gzip, compressed' } : {}
|
173
|
+
end
|
174
|
+
|
175
|
+
# Returns the authorization header to join to the HTTP request.
|
176
|
+
def auth_header
|
177
|
+
{ 'authorization' => [ @username, @password ] }
|
178
|
+
end
|
179
|
+
|
180
|
+
# Returns the HTTP headers common to each valid PowerTrack connection.
|
181
|
+
# Each call returns a new hash which can be safely modified by the caller.
|
182
|
+
def connection_headers
|
183
|
+
{ connect_timeout: @options[:connect_timeout],
|
184
|
+
inactivity_timeout: @options[:inactivity_timeout] }
|
185
|
+
end
|
186
|
+
|
187
|
+
# Opens a new connection to GNIP PowerTrack.
|
188
|
+
def connect(hostname, feature=nil)
|
189
|
+
url = feature_url(hostname, feature)
|
190
|
+
EventMachine::HttpRequest.new(url, connection_headers)
|
191
|
+
end
|
192
|
+
|
193
|
+
# Returns the HTTP headers common to each valid PowerTrack request.
|
194
|
+
# Each call returns a new hash which can be safely modified by the caller.
|
195
|
+
def common_req_headers
|
196
|
+
{ 'accept' => 'application/json',
|
197
|
+
'content-type' => 'application/json; charset=utf-8',
|
198
|
+
:redirects => 3 }.merge(auth_header)
|
199
|
+
end
|
200
|
+
|
201
|
+
# Returns the HTTP headers common to each valid /rules request.
|
202
|
+
# Each call returns a new hash which can be safely modified by the caller.
|
203
|
+
def rules_req_headers
|
204
|
+
common_req_headers
|
205
|
+
end
|
206
|
+
|
207
|
+
# Parses a JSON-formatted body received as the response of a PowerTrack API
|
208
|
+
# request.
|
209
|
+
#
|
210
|
+
# Returns nil when the body is empty, the Ruby object decoded from the
|
211
|
+
# JSON-formatted body otherwise.
|
212
|
+
#
|
213
|
+
# If the parsing fails, returns the value returned by the given block which
|
214
|
+
# is called with the textual body as a single argument. If no block id,
|
215
|
+
# return the textual body initially received.
|
216
|
+
def parse_json_body(body, &block)
|
217
|
+
body = (body || '').strip
|
218
|
+
begin
|
219
|
+
body == '' ? nil : MultiJson.load(body)
|
220
|
+
rescue
|
221
|
+
if block_given?
|
222
|
+
yield($!)
|
223
|
+
else
|
224
|
+
body
|
225
|
+
end
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
229
|
+
# Returns an appropriate return value or exception according to the response
|
230
|
+
# obtained on an API request.
|
231
|
+
def handle_api_response(status, error, body, ok=DEFAULT_OK_RESPONSE_STATUS)
|
232
|
+
case status
|
233
|
+
when nil
|
234
|
+
# connection issue
|
235
|
+
raise PowerTrack::ConnectionError.new(error)
|
236
|
+
when ok
|
237
|
+
# successful call: return the body unless there isn't any
|
238
|
+
return nil if body.nil?
|
239
|
+
|
240
|
+
parse_json_body(body) do |exception|
|
241
|
+
# invalid JSON response
|
242
|
+
raise PowerTrack::InvalidResponseError.new(ok, exception.message, body)
|
243
|
+
end
|
244
|
+
else
|
245
|
+
# specified response status
|
246
|
+
raise PowerTrack::WithStatusPowerTrackError.build(status, error, parse_json_body(body))
|
247
|
+
end
|
248
|
+
end
|
249
|
+
|
250
|
+
DEFAULT_RULES_REQUEST_OPTIONS = {
|
251
|
+
ok: DEFAULT_OK_RESPONSE_STATUS,
|
252
|
+
headers: {},
|
253
|
+
body: nil
|
254
|
+
}
|
255
|
+
|
256
|
+
# Makes a rules-related request with a specific HTTP verb and a few options.
|
257
|
+
# Returns the response if successful or an exception if the request failed.
|
258
|
+
def make_rules_request(verb, options=nil)
|
259
|
+
options = DEFAULT_RULES_REQUEST_OPTIONS.merge(options || {})
|
260
|
+
resp_status = nil
|
261
|
+
resp_error = nil
|
262
|
+
resp_body = nil
|
263
|
+
|
264
|
+
EM.run do
|
265
|
+
con = connect('api', 'rules')
|
266
|
+
http = con.setup_request(verb,
|
267
|
+
head: rules_req_headers.merge(options[:headers]),
|
268
|
+
body: options[:body])
|
269
|
+
|
270
|
+
http.errback do
|
271
|
+
resp_error = http.error
|
272
|
+
EM.stop
|
273
|
+
end
|
274
|
+
|
275
|
+
http.callback do
|
276
|
+
resp_status = http.response_header.status
|
277
|
+
resp_error = http.error
|
278
|
+
resp_body = http.response
|
279
|
+
EM.stop
|
280
|
+
end
|
281
|
+
end
|
282
|
+
|
283
|
+
handle_api_response(resp_status, resp_error, resp_body, options[:ok])
|
284
|
+
end
|
285
|
+
|
286
|
+
# Returns the type of message received on the stream, nil when the type
|
287
|
+
# cannot be identified.
|
288
|
+
def message_type(message)
|
289
|
+
case message
|
290
|
+
when HEARTBEAT_MESSAGE_PATTERN then :heartbeat
|
291
|
+
when SYSTEM_MESSAGE_PATTERN then :system
|
292
|
+
else
|
293
|
+
:activity
|
294
|
+
end
|
295
|
+
end
|
296
|
+
|
297
|
+
# Returns the HTTP headers for each valid /track request.
|
298
|
+
# Each call returns a new hash which can be safely modified by the caller.
|
299
|
+
def track_req_headers(compressed)
|
300
|
+
common_req_headers.merge('connection' => 'keep-alive')
|
301
|
+
.merge(gzip_compressed_header(compressed))
|
302
|
+
end
|
303
|
+
|
304
|
+
# Connects to the /track endpoint and manages reconnections when being
|
305
|
+
# disconnected.
|
306
|
+
def track_once(options, retrier)
|
307
|
+
logger.info "Starting tracker for retry ##{retrier.retries}..."
|
308
|
+
stop_timeout = options[:stop_timeout]
|
309
|
+
on_heartbeat = options[:on_heartbeat]
|
310
|
+
on_message = options[:on_message]
|
311
|
+
on_activity = options[:on_activity]
|
312
|
+
close_now = options[:close_now] || lambda { false }
|
313
|
+
|
314
|
+
buffer = PowerTrack::DataBuffer.new
|
315
|
+
closed = false
|
316
|
+
disconnected = false
|
317
|
+
resp_status = DEFAULT_OK_RESPONSE_STATUS
|
318
|
+
resp_error = nil
|
319
|
+
resp_body = nil
|
320
|
+
|
321
|
+
EM.run do
|
322
|
+
logger.info "Starting the reactor..."
|
323
|
+
con = connect('stream')
|
324
|
+
http = con.get(head: track_req_headers(options[:compressed]))
|
325
|
+
|
326
|
+
# polls to see if the connection should be closed
|
327
|
+
close_watcher = EM.add_periodic_timer(1) do
|
328
|
+
# exit if required
|
329
|
+
if close_now.call
|
330
|
+
logger.info "Time to close the tracker"
|
331
|
+
closed = true
|
332
|
+
close_watcher.cancel
|
333
|
+
con.close
|
334
|
+
end
|
335
|
+
end
|
336
|
+
|
337
|
+
# simulate periodic disconnections
|
338
|
+
if options[:fake_disconnections]
|
339
|
+
EM.add_timer(rand(options[:fake_disconnections])) do
|
340
|
+
con.close
|
341
|
+
end
|
342
|
+
end
|
343
|
+
|
344
|
+
http.stream do |chunk|
|
345
|
+
# ignore data if already disconnected, thus avoiding synchronizing the
|
346
|
+
# buffer. Nevertheless, this should never happen...
|
347
|
+
# TODO: log a warning if it happens
|
348
|
+
|
349
|
+
if disconnected
|
350
|
+
logger.warn "Message received while already disconnected"
|
351
|
+
next
|
352
|
+
end
|
353
|
+
|
354
|
+
# reset retries when some (valid) data are received
|
355
|
+
if retrier.retrying?
|
356
|
+
logger.info "Resetting retries..."
|
357
|
+
retrier.reset!
|
358
|
+
end
|
359
|
+
|
360
|
+
# process the chunk
|
361
|
+
buffer.process(chunk) do |raw|
|
362
|
+
logger.debug "New message received"
|
363
|
+
EM.defer do
|
364
|
+
# select the right message handler(s) according to the message type
|
365
|
+
m_type = message_type(raw)
|
366
|
+
|
367
|
+
if m_type == :heartbeat
|
368
|
+
on_heartbeat.call if on_heartbeat
|
369
|
+
else
|
370
|
+
# JSON decoding if required
|
371
|
+
message = options[:raw] ? raw : MultiJson.decode(raw)
|
372
|
+
|
373
|
+
on_message.call(message) if on_message
|
374
|
+
|
375
|
+
case m_type
|
376
|
+
when :system then on_system.call(message) if on_system
|
377
|
+
when :activity then on_activity.call(message) if on_activity
|
378
|
+
end
|
379
|
+
end
|
380
|
+
|
381
|
+
# TODO: manage exceptions at this level
|
382
|
+
end
|
383
|
+
end
|
384
|
+
end
|
385
|
+
|
386
|
+
# reconnection on error
|
387
|
+
reconnect_cb = lambda do |http_client|
|
388
|
+
logger.info "Disconnected after #{retrier.retries} retries"
|
389
|
+
disconnected = true
|
390
|
+
|
391
|
+
if closed
|
392
|
+
# close immediately if required
|
393
|
+
wait_til_defers_finish_and_stop(stop_timeout)
|
394
|
+
# tell the retrier the tracking is over
|
395
|
+
retrier.stop
|
396
|
+
else
|
397
|
+
# cancel the periodic close watcher
|
398
|
+
close_watcher.cancel
|
399
|
+
|
400
|
+
resp_status = http_client.response_header.status || DEFAULT_OK_RESPONSE_STATUS
|
401
|
+
resp_error = http_client.error
|
402
|
+
resp_body = http_client.response
|
403
|
+
wait_til_defers_finish_and_stop(stop_timeout)
|
404
|
+
end
|
405
|
+
end
|
406
|
+
|
407
|
+
http.callback(&reconnect_cb)
|
408
|
+
http.errback(&reconnect_cb)
|
409
|
+
end
|
410
|
+
|
411
|
+
[ resp_status, resp_error, resp_body ]
|
412
|
+
end
|
413
|
+
|
414
|
+
# Waits for all the deferrable threads to complete, then stops the reactor.
|
415
|
+
def wait_til_defers_finish_and_stop(timeout)
|
416
|
+
# wait for defers to terminate but no more than timeout...
|
417
|
+
start = Time.now
|
418
|
+
defers_waiter = EM.add_periodic_timer(0.2) do
|
419
|
+
logger.info "Waiting for defers..."
|
420
|
+
if EM.defers_finished? || (Time.now - start) > timeout
|
421
|
+
defers_waiter.cancel
|
422
|
+
end
|
423
|
+
end
|
424
|
+
ensure
|
425
|
+
logger.info "Stopping the reactor..."
|
426
|
+
EM.stop
|
427
|
+
end
|
428
|
+
end
|
429
|
+
end
|