powertrack 1.1.1 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/Gemfile.lock +1 -1
- data/History.txt +6 -0
- data/README.md +29 -3
- data/TODO.md +24 -4
- data/lib/powertrack/rules/rule.rb +23 -7
- data/lib/powertrack/rules/string_extension.rb +2 -2
- data/lib/powertrack/streaming/data_buffer.rb +1 -1
- data/lib/powertrack/streaming/retrier.rb +1 -1
- data/lib/powertrack/streaming/stream.rb +77 -41
- data/lib/powertrack/version.rb +1 -1
- data/test/minitest_helper.rb +4 -3
- data/test/test_manage_rules.rb +55 -12
- data/test/test_rule.rb +15 -15
- data/test/test_track_stream.rb +41 -11
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
OTYwNTFiZWQwNTAwYzc2ZWJkYmIyNjA4YWNmNDYxYzMxNWY5ZmYzMA==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
MzJhNTVlNzMwMjIwNWEyNzEzZDY3YTEwYjU2ZWM1NmQ3NzE3OWE4ZQ==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
ZTljMmM0NWQzMzhiYzU3NjE1Y2ZmYzIxNTA1OWE5MTczYzc3NTE5MDExYzNi
|
10
|
+
MDJmODA5ZGQ2ODk0OGRlMzgyNDdhYjcwMGM5MjBkNDZhMzBmOWI0NjJjNzMx
|
11
|
+
ZmNmMmU3MmNjMjA2Y2RjZmE1OGVmM2FkZTUwOWI4MTQ0ZDBmNjk=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
ODJlYjQ1OTU5NTY0YjdhZTczMTY0OWExZmFlNTk1OGI0YzBiY2ZiY2Y4Yzcy
|
14
|
+
NzQ4N2E0MGNmZDA4MTFmMzE0NzQyNDU0MGM5NTczNWMyZGEwMDQ1YzgyMjZk
|
15
|
+
MzcxYTUzNjcxZjZhY2YyZTc2YjcxYTBkZjZiNjQyNjEyZDMzZWM=
|
data/Gemfile.lock
CHANGED
data/History.txt
CHANGED
data/README.md
CHANGED
@@ -102,14 +102,14 @@ Backfill is a feature provided by GNIP to avoid losing activities when being
|
|
102
102
|
disconnected. It automatically resends the messages sent on the stream for the
|
103
103
|
last 5 minutes when reconnecting.
|
104
104
|
|
105
|
-
Provide a (numerical) client id
|
106
|
-
PowerTrack::Stream
|
105
|
+
Provide a (numerical) client id by setting the ```:client_id``` option when
|
106
|
+
building a ```PowerTrack::Stream``` object to enable this feature.
|
107
107
|
|
108
108
|
## Replay
|
109
109
|
|
110
110
|
Replay is a feature provided by GNIP to recover lost activities over the last
|
111
111
|
5 days. The Replay stream lives aside the realtime stream and is activated
|
112
|
-
by setting the ```:replay``` option to true when building a ```PowerTrack::Stream```
|
112
|
+
by setting the ```:replay``` option to ```true``` when building a ```PowerTrack::Stream```
|
113
113
|
object.
|
114
114
|
|
115
115
|
Once Replay is activated, you use the stream as previously, starting by
|
@@ -128,6 +128,32 @@ replaying the same timeframe again and again when GNIP is unstable.
|
|
128
128
|
All the errors that come from PowerTrack are defined through an ad-hoc exception
|
129
129
|
class hierarchy. See ```lib/powertrack/errors.rb```.
|
130
130
|
|
131
|
+
## PowerTrack v2
|
132
|
+
|
133
|
+
The library provides early support for PowerTrack API version 2. Please read
|
134
|
+
[PowerTrack API v2](http://support.gnip.com/apis/powertrack2.0/index.html) and
|
135
|
+
the [Migration Guide](http://support.gnip.com/apis/powertrack2.0/transition.html)
|
136
|
+
for details about this new major release.
|
137
|
+
|
138
|
+
Set the ```:v2```option to ```true``` when building a ```PowerTrack::Stream```
|
139
|
+
object to enable this feature. The library uses v1 by default.
|
140
|
+
|
141
|
+
Everything should work the same for v2 as for v1 except
|
142
|
+
|
143
|
+
o ```PowerTrack::Stream.add_rule``` and ```PowerTrack::Stream.delete_rule```
|
144
|
+
returns a status instead of nil
|
145
|
+
o The Backfill feature is configured by the ```:backfill_minutes``` option passed
|
146
|
+
to the ```PowerTrack::Stream.track``` method instead of passing a ```:client_id```
|
147
|
+
option to the ```PowerTrack::Stream``` initializer (which is simply ignored
|
148
|
+
when v2 is turned on). The new option specifies a number of minutes of backfill
|
149
|
+
data to receive.
|
150
|
+
o The Replay feature still uses v1 even if you explicitly turn v2 on. Support
|
151
|
+
for [Replay v2](http://support.gnip.com/apis/replay2.0/api_reference.html) is
|
152
|
+
planned but not scheduled yet.
|
153
|
+
|
154
|
+
Finally, PowerTrack v2 has a new endpoint for rule validation that is not
|
155
|
+
supported by this library yet.
|
156
|
+
|
131
157
|
## Credits
|
132
158
|
|
133
159
|
The ```powertrack``` gem heavily relies on *EventMachine* and the *em-http-request*
|
data/TODO.md
CHANGED
@@ -60,7 +60,7 @@ See [Data format](http://support.gnip.com/sources/twitter/data_format.html)
|
|
60
60
|
* _[DONE]_ Support Original output format
|
61
61
|
* _[DONE]_ Support Activity Stream output format
|
62
62
|
* _[DONE]_ Support raw format
|
63
|
-
|
63
|
+
|
64
64
|
* _[OUT]_ Manage retweets.
|
65
65
|
See [Identifying and Understanding retweets](http://support.gnip.com/articles/identifying-and-understanding-retweets.html)
|
66
66
|
|
@@ -71,8 +71,8 @@ See [Managing disconnections](http://support.gnip.com/articles/disconnections-ex
|
|
71
71
|
* _[DONE]_ Reconnect after disconnect. See
|
72
72
|
[Disconnections & Reconnecting](http://support.gnip.com/apis/consuming_streaming_data.html#Disconnections)
|
73
73
|
* _[DONE]_ Reconnect using an exponential backoff pattern.
|
74
|
-
* _[DONE]_ Support Backfill
|
75
|
-
* Support Replay
|
74
|
+
* _[DONE]_ Support Backfill (v1)
|
75
|
+
* _[DONE]_ Support Replay (v1)
|
76
76
|
* Reconnect when there's a GNIP server issue signaled by the 503 HTTP response status
|
77
77
|
|
78
78
|
## Other features
|
@@ -80,4 +80,24 @@ See [Managing disconnections](http://support.gnip.com/articles/disconnections-ex
|
|
80
80
|
* _[DONE]_ Support test and development streams
|
81
81
|
* _[DONE]_ Support Replay mode (5-days back history)
|
82
82
|
* Support status dashboard
|
83
|
-
* Support Historical
|
83
|
+
* Support Historical PowerTrack
|
84
|
+
|
85
|
+
## PowerTrack v2
|
86
|
+
See [Migration Guide](http://support.gnip.com/apis/powertrack2.0/transition.html)
|
87
|
+
and [PowerTrack API v2](http://support.gnip.com/apis/powertrack2.0/index.html).
|
88
|
+
|
89
|
+
* _[DONE]_ Support both v1 and v2 with the same interface/class
|
90
|
+
* _[DONE]_ Support new endpoint URLs
|
91
|
+
* Support rule validator
|
92
|
+
* Support new operators and quoted tweet filtering.
|
93
|
+
Double-check with tests that the gem does not prevent their usage
|
94
|
+
* _[DONE]_ Support new backfill behavior
|
95
|
+
* _[DONE]_ Support fixed backfill period used at first connection
|
96
|
+
* _[DONE]_ Support fixed backfill period used at each reconnect
|
97
|
+
* Support dynamic backfill period at each reconnect, calibrated according to
|
98
|
+
the number of minutes the stream was disconnected. Emit a warning if the
|
99
|
+
stream was disconnected more than 5 minutes (tweets were probably lost)
|
100
|
+
* _[DONE]_ Use HTTP POST verb (instead of DELETE) for rule deletions
|
101
|
+
* _[DONE]_ Fallback to v1 when Replay mode wants to use v2. Emit a warning.
|
102
|
+
* Support Replay v2
|
103
|
+
[Replay API 2.0 Reference](http://support.gnip.com/apis/replay2.0/api_reference.html)
|
@@ -19,17 +19,32 @@ module PowerTrack
|
|
19
19
|
# The maximum number of negative terms in a single rule value
|
20
20
|
MAX_NEGATIVE_TERMS = 50
|
21
21
|
|
22
|
-
|
23
|
-
|
24
|
-
|
22
|
+
# The default rule features
|
23
|
+
DEFAULT_RULE_FEATURES = {
|
24
|
+
# no id by default
|
25
|
+
id: nil,
|
26
|
+
# no tag by default
|
27
|
+
tag: nil,
|
28
|
+
# long determined by value length
|
29
|
+
long: nil
|
30
|
+
}.freeze
|
31
|
+
|
32
|
+
attr_reader :value, :id, :tag, :error
|
33
|
+
|
34
|
+
# Builds a new rule based on a value and some optional features
|
35
|
+
# (:id, :tag, :long).
|
36
|
+
#
|
25
37
|
# By default, the constructor assesses if it's a long rule or not
|
26
38
|
# based on the length of the value. But the 'long' feature can be
|
27
|
-
# explicitly specified with the
|
28
|
-
def initialize(value,
|
39
|
+
# explicitly specified with the :long feature.
|
40
|
+
def initialize(value, features=nil)
|
29
41
|
@value = value || ''
|
30
|
-
|
42
|
+
features = DEFAULT_RULE_FEATURES.merge(features || {})
|
43
|
+
@tag = features[:tag]
|
44
|
+
@id = features[:id]
|
31
45
|
# check if long is a boolean
|
32
|
-
|
46
|
+
_long = features[:long]
|
47
|
+
@long = _long == !!_long ? _long : @value.size > MAX_STD_RULE_VALUE_LENGTH
|
33
48
|
@error = nil
|
34
49
|
end
|
35
50
|
|
@@ -70,6 +85,7 @@ module PowerTrack
|
|
70
85
|
def to_hash
|
71
86
|
res = {:value => @value}
|
72
87
|
res[:tag] = @tag unless @tag.nil?
|
88
|
+
res[:id] = @id unless @id.nil?
|
73
89
|
res
|
74
90
|
end
|
75
91
|
|
@@ -3,7 +3,7 @@ require 'powertrack/rules/rule'
|
|
3
3
|
# Extend core String class with a rule transformer
|
4
4
|
class String
|
5
5
|
# Returns a PowerTrace::Rule instance based on the value of the string.
|
6
|
-
def to_pwtk_rule(
|
7
|
-
PowerTrack::Rule.new(self,
|
6
|
+
def to_pwtk_rule(features=nil)
|
7
|
+
PowerTrack::Rule.new(self, features)
|
8
8
|
end
|
9
9
|
end
|
@@ -22,7 +22,7 @@ module PowerTrack
|
|
22
22
|
max_elapsed_time: DEFAULT_MAX_ELAPSED_TIME,
|
23
23
|
multiplier: DEFAULT_INTERVAL_MULTIPLIER,
|
24
24
|
randomize_factor: DEFAULT_RANDOMIZE_FACTOR
|
25
|
-
}
|
25
|
+
}.freeze
|
26
26
|
|
27
27
|
# Builds a retrier that will retry a maximum retries number of times.
|
28
28
|
def initialize(max_retries, options=nil)
|
@@ -18,7 +18,12 @@ module PowerTrack
|
|
18
18
|
include VoidLogger::LoggerMixin
|
19
19
|
|
20
20
|
# The format of the URLs to connect to the various stream services
|
21
|
-
FEATURE_URL_FORMAT =
|
21
|
+
FEATURE_URL_FORMAT = {
|
22
|
+
# [ hostname, account, source, mode, label, feature ]
|
23
|
+
v1: "https://%s.gnip.com/accounts/%s/publishers/%s/%s/track/%s%s.json".freeze,
|
24
|
+
# [ hostname, feature, account, source, label, sub-feature ]
|
25
|
+
v2: "https://gnip-%s.twitter.com/%s/powertrack/accounts/%s/publishers/%s/%s%s.json".freeze
|
26
|
+
}.freeze
|
22
27
|
|
23
28
|
# The default timeout on a connection to PowerTrack. Can be overriden per call.
|
24
29
|
DEFAULT_CONNECTION_TIMEOUT = 30
|
@@ -29,24 +34,28 @@ module PowerTrack
|
|
29
34
|
|
30
35
|
# The default options for using the stream.
|
31
36
|
DEFAULT_STREAM_OPTIONS = {
|
37
|
+
# enable PowerTrack v2 API (using v1 by default)
|
38
|
+
v2: false,
|
39
|
+
# override the default connection timeout
|
32
40
|
connect_timeout: DEFAULT_CONNECTION_TIMEOUT,
|
41
|
+
# override the default inactivity timeout
|
33
42
|
inactivity_timeout: DEFAULT_INACTIVITY_TIMEOUT,
|
34
|
-
# use a client id if you want to leverage the Backfill feature
|
43
|
+
# use a client id if you want to leverage the Backfill feature in v1
|
35
44
|
client_id: nil,
|
36
45
|
# enable the replay mode to get activities over the last 5 days
|
37
46
|
# see http://support.gnip.com/apis/replay/api_reference.html
|
38
47
|
replay: false
|
39
|
-
}
|
48
|
+
}.freeze
|
40
49
|
|
41
50
|
DEFAULT_OK_RESPONSE_STATUS = 200
|
42
51
|
|
43
52
|
# The patterns used to identify the various types of message received from GNIP
|
44
53
|
# everything else is an activity
|
45
|
-
HEARTBEAT_MESSAGE_PATTERN = /\A\s*\z
|
46
|
-
SYSTEM_MESSAGE_PATTERN = /\A\s*\{\s*"(info|warn|error)":/mi
|
54
|
+
HEARTBEAT_MESSAGE_PATTERN = /\A\s*\z/.freeze
|
55
|
+
SYSTEM_MESSAGE_PATTERN = /\A\s*\{\s*"(info|warn|error)":/mi.freeze
|
47
56
|
|
48
57
|
# The format used to send UTC timestamps in Replay mode
|
49
|
-
REPLAY_TIMESTAMP_FORMAT = '%Y%m%d%H%M'
|
58
|
+
REPLAY_TIMESTAMP_FORMAT = '%Y%m%d%H%M'.freeze
|
50
59
|
|
51
60
|
attr_reader :username, :account_name, :data_source, :label
|
52
61
|
|
@@ -57,9 +66,12 @@ module PowerTrack
|
|
57
66
|
@data_source = data_source
|
58
67
|
@label = label
|
59
68
|
@options = DEFAULT_STREAM_OPTIONS.merge(options || {})
|
60
|
-
@client_id = @options[:client_id]
|
61
69
|
@replay = !!@options[:replay]
|
70
|
+
@client_id = @options[:client_id]
|
62
71
|
@stream_mode = @replay ? 'replay' : 'streams'
|
72
|
+
|
73
|
+
# force v1 if Replay activated
|
74
|
+
@v2 = !@replay && !!@options[:v2]
|
63
75
|
end
|
64
76
|
|
65
77
|
# Adds many rules to your PowerTrack stream’s ruleset.
|
@@ -69,7 +81,9 @@ module PowerTrack
|
|
69
81
|
# See http://support.gnip.com/apis/powertrack/api_reference.html#AddRules
|
70
82
|
def add_rules(*rules)
|
71
83
|
# flatten the rules in case it was provided as an array
|
72
|
-
make_rules_request(:post,
|
84
|
+
make_rules_request(:post,
|
85
|
+
body: MultiJson.encode('rules' => rules.flatten),
|
86
|
+
ok: 201)
|
73
87
|
end
|
74
88
|
|
75
89
|
# Removes the specified rules from the stream.
|
@@ -78,8 +92,14 @@ module PowerTrack
|
|
78
92
|
#
|
79
93
|
# See http://support.gnip.com/apis/powertrack/api_reference.html#DeleteRules
|
80
94
|
def delete_rules(*rules)
|
95
|
+
# v2 does not use DELETE anymore
|
96
|
+
delete_verb = @v2 ? :post : :delete
|
81
97
|
# flatten the rules in case it was provided as an array
|
82
|
-
|
98
|
+
delete_options = { body: MultiJson.encode('rules' => rules.flatten) }
|
99
|
+
# v2 uses a query parameter
|
100
|
+
delete_options[:query] = { '_method' => 'delete' } if @v2
|
101
|
+
|
102
|
+
make_rules_request(delete_verb, delete_options)
|
83
103
|
end
|
84
104
|
|
85
105
|
DEFAULT_LIST_RULES_OPTIONS = {
|
@@ -103,7 +123,9 @@ module PowerTrack
|
|
103
123
|
res.is_a?(Hash) &&
|
104
124
|
(rules = res['rules']).is_a?(Array) &&
|
105
125
|
rules.all? { |rule| rule.is_a?(Hash) && rule.key?('value') }
|
106
|
-
rules.map
|
126
|
+
rules.map do |rule|
|
127
|
+
PowerTrack::Rule.new(rule['value'], tag: rule['tag'], id: rule['id'])
|
128
|
+
end
|
107
129
|
else
|
108
130
|
res
|
109
131
|
end
|
@@ -125,6 +147,8 @@ module PowerTrack
|
|
125
147
|
from: nil,
|
126
148
|
# the ending date to which the activities will be recovered (replay mode only)
|
127
149
|
to: nil,
|
150
|
+
# specify a number of minutes to leverage the Backfill feature (v2 only)
|
151
|
+
backfill_minutes: nil,
|
128
152
|
# called for each message received, except heartbeats
|
129
153
|
on_message: nil,
|
130
154
|
# called for each activity received
|
@@ -140,6 +164,8 @@ module PowerTrack
|
|
140
164
|
# Establishes a persistent connection to the PowerTrack data stream,
|
141
165
|
# through which the social data will be delivered.
|
142
166
|
#
|
167
|
+
# Manages reconnections when being disconnected.
|
168
|
+
#
|
143
169
|
# <tt>GET /track/:stream</tt>
|
144
170
|
#
|
145
171
|
# See http://support.gnip.com/apis/powertrack/api_reference.html#Stream
|
@@ -151,30 +177,31 @@ module PowerTrack
|
|
151
177
|
|
152
178
|
private
|
153
179
|
|
154
|
-
# Returns the fully-qualified domain name of a GNIP PowerTrack server
|
155
|
-
# based on a hostname.
|
156
|
-
def gnip_server_name(hostname)
|
157
|
-
"%s.gnip.com" % [ hostname ]
|
158
|
-
end
|
159
|
-
|
160
|
-
# Returns the port used by GNIP PowerTrack servers.
|
161
|
-
def gnip_server_port
|
162
|
-
'443'
|
163
|
-
end
|
164
|
-
|
165
180
|
# Returns the URL of the stream for a given feature.
|
166
|
-
def feature_url(hostname, feature=nil)
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
181
|
+
def feature_url(hostname, feature=nil, sub_feature=nil)
|
182
|
+
_url = nil
|
183
|
+
if @v2
|
184
|
+
feature ||= hostname
|
185
|
+
sub_feature = sub_feature ? "/#{sub_feature}" : ''
|
186
|
+
_url = FEATURE_URL_FORMAT[:v2] %
|
187
|
+
[ hostname,
|
188
|
+
feature,
|
189
|
+
@account_name,
|
190
|
+
@data_source,
|
191
|
+
@label,
|
192
|
+
sub_feature ]
|
193
|
+
else
|
194
|
+
feature = feature ? "/#{feature}" : ''
|
195
|
+
_url = FEATURE_URL_FORMAT[:v1] %
|
196
|
+
[ hostname,
|
197
|
+
@account_name,
|
198
|
+
@data_source,
|
199
|
+
@stream_mode,
|
200
|
+
@label,
|
201
|
+
feature ]
|
202
|
+
|
203
|
+
_url += "?client=#{@client_id}" if @client_id
|
204
|
+
end
|
178
205
|
|
179
206
|
_url
|
180
207
|
end
|
@@ -198,8 +225,8 @@ module PowerTrack
|
|
198
225
|
end
|
199
226
|
|
200
227
|
# Opens a new connection to GNIP PowerTrack.
|
201
|
-
def connect(hostname, feature=nil)
|
202
|
-
url = feature_url(hostname, feature)
|
228
|
+
def connect(hostname, feature=nil, sub_feature=nil)
|
229
|
+
url = feature_url(hostname, feature, sub_feature)
|
203
230
|
logger.debug("Connecting to '#{url}' with headers #{connection_headers}...")
|
204
231
|
EventMachine::HttpRequest.new(url, connection_headers)
|
205
232
|
end
|
@@ -264,8 +291,9 @@ module PowerTrack
|
|
264
291
|
DEFAULT_RULES_REQUEST_OPTIONS = {
|
265
292
|
ok: DEFAULT_OK_RESPONSE_STATUS,
|
266
293
|
headers: {},
|
294
|
+
query: {},
|
267
295
|
body: nil
|
268
|
-
}
|
296
|
+
}.freeze
|
269
297
|
|
270
298
|
# Makes a rules-related request with a specific HTTP verb and a few options.
|
271
299
|
# Returns the response if successful or an exception if the request failed.
|
@@ -279,6 +307,7 @@ module PowerTrack
|
|
279
307
|
con = connect('api', 'rules')
|
280
308
|
http = con.setup_request(verb,
|
281
309
|
head: rules_req_headers.merge(options[:headers]),
|
310
|
+
query: options[:query],
|
282
311
|
body: options[:body])
|
283
312
|
|
284
313
|
http.errback do
|
@@ -315,10 +344,10 @@ module PowerTrack
|
|
315
344
|
.merge(gzip_compressed_header(compressed))
|
316
345
|
end
|
317
346
|
|
318
|
-
# Connects to the /track endpoint
|
319
|
-
# disconnected.
|
347
|
+
# Connects to the /track endpoint.
|
320
348
|
def track_once(options, retrier)
|
321
349
|
logger.info "Starting tracker for retry ##{retrier.retries}..."
|
350
|
+
backfill_minutes = options[:backfill_minutes]
|
322
351
|
stop_timeout = options[:stop_timeout]
|
323
352
|
on_heartbeat = options[:on_heartbeat]
|
324
353
|
on_message = options[:on_message]
|
@@ -336,7 +365,10 @@ module PowerTrack
|
|
336
365
|
EM.run do
|
337
366
|
logger.info "Starting the reactor..."
|
338
367
|
con = connect('stream')
|
339
|
-
get_opts = {
|
368
|
+
get_opts = {
|
369
|
+
head: track_req_headers(options[:compressed]),
|
370
|
+
query: {}
|
371
|
+
}
|
340
372
|
|
341
373
|
# add a timeframe in replay mode
|
342
374
|
if @replay
|
@@ -346,14 +378,18 @@ module PowerTrack
|
|
346
378
|
# stop 30 minutes ago by default
|
347
379
|
to = options[:to] || (now - 30*60)
|
348
380
|
|
349
|
-
get_opts[:query]
|
381
|
+
get_opts[:query].merge!({
|
350
382
|
'fromDate' => from.utc.strftime(REPLAY_TIMESTAMP_FORMAT),
|
351
383
|
'toDate' => to.utc.strftime(REPLAY_TIMESTAMP_FORMAT)
|
352
|
-
}
|
384
|
+
})
|
353
385
|
|
354
386
|
logger.info "Replay mode enabled from '#{from}' to '#{to}'"
|
355
387
|
end
|
356
388
|
|
389
|
+
if @v2 && backfill_minutes
|
390
|
+
get_opts[:query]['backfillMinutes'] = backfill_minutes
|
391
|
+
end
|
392
|
+
|
357
393
|
http = con.get(get_opts)
|
358
394
|
|
359
395
|
# polls to see if the connection should be closed
|
data/lib/powertrack/version.rb
CHANGED
data/test/minitest_helper.rb
CHANGED
@@ -30,13 +30,14 @@ class Minitest::Test
|
|
30
30
|
end
|
31
31
|
|
32
32
|
# Returns a brand-new stream based on the config found in test/powertrack.yml.
|
33
|
-
def new_stream(replay=false)
|
33
|
+
def new_stream(v2=false, replay=false)
|
34
34
|
PowerTrack::Stream.new(
|
35
35
|
powertrack_config[:username],
|
36
36
|
powertrack_config[:password],
|
37
37
|
powertrack_config[:account_name],
|
38
38
|
powertrack_config[:data_source],
|
39
|
-
replay ? 'prod' : powertrack_config[:stream_label],
|
40
|
-
replay: replay
|
39
|
+
replay ? 'prod' : (v2 ? 'prod2' : powertrack_config[:stream_label]),
|
40
|
+
replay: replay,
|
41
|
+
v2: v2)
|
41
42
|
end
|
42
43
|
end
|
data/test/test_manage_rules.rb
CHANGED
@@ -4,35 +4,78 @@ require 'multi_json'
|
|
4
4
|
|
5
5
|
class TestManageRules < Minitest::Test
|
6
6
|
|
7
|
-
def
|
8
|
-
add_then_delete_a_single_rule(false)
|
7
|
+
def test_add_then_delete_a_single_rule_v1
|
8
|
+
add_then_delete_a_single_rule(false, false)
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_add_then_delete_a_single_rule_v2
|
12
|
+
add_then_delete_a_single_rule(true, false)
|
9
13
|
end
|
10
14
|
|
11
15
|
def test_add_then_delete_a_single_rule_in_replay_mode
|
12
|
-
add_then_delete_a_single_rule(true)
|
16
|
+
add_then_delete_a_single_rule(false, true)
|
13
17
|
end
|
14
18
|
|
15
|
-
def add_then_delete_a_single_rule(replay)
|
16
|
-
stream = new_stream(replay)
|
19
|
+
def add_then_delete_a_single_rule(v2, replay)
|
20
|
+
stream = new_stream(v2, replay)
|
17
21
|
|
18
|
-
|
19
|
-
|
22
|
+
# add a logger
|
23
|
+
stream.logger = Logger.new(STDERR)
|
24
|
+
|
25
|
+
new_rule = PowerTrack::Rule.new('coke')
|
26
|
+
assert new_rule.valid?
|
20
27
|
|
21
28
|
pre_existing_rules = stream.list_rules
|
29
|
+
$stderr.puts pre_existing_rules.inspect
|
22
30
|
assert pre_existing_rules.is_a?(Array)
|
31
|
+
assert pre_existing_rules.all? { |rule| !rule.id.nil? } if v2
|
32
|
+
|
33
|
+
already_in = pre_existing_rules.any? { |rule| new_rule == rule }
|
23
34
|
|
24
|
-
|
35
|
+
res = stream.add_rule(new_rule)
|
25
36
|
|
26
|
-
|
37
|
+
if v2
|
38
|
+
assert res.is_a?(Hash)
|
39
|
+
assert res['summary'].is_a?(Hash)
|
40
|
+
|
41
|
+
if already_in
|
42
|
+
assert_equal 0, res['summary']['created']
|
43
|
+
assert_equal 1, res['summary']['not_created']
|
44
|
+
else
|
45
|
+
assert_equal 1, res['summary']['created']
|
46
|
+
assert_equal 0, res['summary']['not_created']
|
47
|
+
end
|
48
|
+
else
|
49
|
+
assert_nil res
|
50
|
+
end
|
51
|
+
|
52
|
+
rules_after_addition = stream.list_rules
|
27
53
|
assert rules_after_addition.is_a?(Array)
|
28
|
-
|
29
|
-
|
54
|
+
assert rules_after_addition.all? { |rule| !rule.id.nil? } if v2
|
55
|
+
|
56
|
+
if already_in
|
57
|
+
assert_equal pre_existing_rules.size, rules_after_addition.size
|
58
|
+
assert [], rules_after_addition - pre_existing_rules
|
59
|
+
else
|
60
|
+
assert_equal pre_existing_rules.size + 1, rules_after_addition.size
|
61
|
+
assert [ new_rule ], rules_after_addition - pre_existing_rules
|
62
|
+
end
|
63
|
+
|
64
|
+
res = stream.delete_rules(new_rule)
|
30
65
|
|
31
|
-
|
66
|
+
if v2
|
67
|
+
assert res.is_a?(Hash)
|
68
|
+
assert res['summary'].is_a?(Hash)
|
69
|
+
assert_equal 1, res['summary']['deleted']
|
70
|
+
assert_equal 0, res['summary']['not_deleted']
|
71
|
+
else
|
72
|
+
assert_nil res
|
73
|
+
end
|
32
74
|
|
33
75
|
rules_after_removal = stream.list_rules
|
34
76
|
assert rules_after_removal.is_a?(Array)
|
35
77
|
assert_equal rules_after_addition.size - 1, rules_after_removal.size
|
36
78
|
assert_equal [], rules_after_removal - rules_after_addition
|
79
|
+
assert rules_after_removal.all? { |rule| !rule.id.nil? } if v2
|
37
80
|
end
|
38
81
|
end
|
data/test/test_rule.rb
CHANGED
@@ -12,7 +12,7 @@ class TestRule < Minitest::Test
|
|
12
12
|
assert rule.valid?
|
13
13
|
assert_nil rule.error
|
14
14
|
|
15
|
-
rule = PowerTrack::Rule.new('pepsi', 'soda', true)
|
15
|
+
rule = PowerTrack::Rule.new('pepsi', tag: 'soda', long: true)
|
16
16
|
assert_equal 'pepsi', rule.value
|
17
17
|
assert_equal 'soda', rule.tag
|
18
18
|
assert rule.long?
|
@@ -22,12 +22,12 @@ class TestRule < Minitest::Test
|
|
22
22
|
|
23
23
|
def test_too_long_tag
|
24
24
|
long_tag = 'a' * PowerTrack::Rule::MAX_TAG_LENGTH
|
25
|
-
rule = PowerTrack::Rule.new('coke', long_tag, false)
|
25
|
+
rule = PowerTrack::Rule.new('coke', tag: long_tag, long: false)
|
26
26
|
assert rule.valid?
|
27
27
|
assert_nil rule.error
|
28
28
|
|
29
29
|
long_tag = 'b' * 2 * PowerTrack::Rule::MAX_TAG_LENGTH
|
30
|
-
rule = PowerTrack::Rule.new('coke', long_tag, true)
|
30
|
+
rule = PowerTrack::Rule.new('coke', tag: long_tag, long: true)
|
31
31
|
assert !rule.valid?
|
32
32
|
assert_match /too long tag/i, rule.error
|
33
33
|
end
|
@@ -38,13 +38,13 @@ class TestRule < Minitest::Test
|
|
38
38
|
assert rule.valid?
|
39
39
|
|
40
40
|
long_val = 'c' * PowerTrack::Rule::MAX_LONG_RULE_VALUE_LENGTH
|
41
|
-
rule = long_val.to_pwtk_rule(
|
41
|
+
rule = long_val.to_pwtk_rule(long: false)
|
42
42
|
|
43
43
|
assert !rule.valid?
|
44
44
|
assert_match /too long value/i, rule.error
|
45
45
|
|
46
46
|
assert long_val.to_pwtk_rule.valid?
|
47
|
-
assert long_val.to_pwtk_rule(
|
47
|
+
assert long_val.to_pwtk_rule(long: true).valid?
|
48
48
|
|
49
49
|
very_long_val = 'rrr' * PowerTrack::Rule::MAX_LONG_RULE_VALUE_LENGTH
|
50
50
|
rule = very_long_val.to_pwtk_rule
|
@@ -59,18 +59,18 @@ class TestRule < Minitest::Test
|
|
59
59
|
assert rule.valid?
|
60
60
|
assert_nil rule.error
|
61
61
|
|
62
|
-
long_rule = PowerTrack::Rule.new(phrase,
|
62
|
+
long_rule = PowerTrack::Rule.new(phrase, long: true)
|
63
63
|
assert long_rule.long?
|
64
64
|
assert long_rule.valid?
|
65
65
|
assert_nil long_rule.error
|
66
66
|
|
67
67
|
phrase = ([ 'coke' ] * (2 * PowerTrack::Rule::MAX_POSITIVE_TERMS)).join(' ')
|
68
|
-
rule = PowerTrack::Rule.new(phrase,
|
68
|
+
rule = PowerTrack::Rule.new(phrase, long: false)
|
69
69
|
assert !rule.long?
|
70
70
|
assert !rule.valid?
|
71
71
|
assert_match /too many positive terms/i, rule.error
|
72
72
|
|
73
|
-
long_rule = PowerTrack::Rule.new(phrase,
|
73
|
+
long_rule = PowerTrack::Rule.new(phrase, long: true)
|
74
74
|
assert long_rule.long?
|
75
75
|
assert long_rule.valid?
|
76
76
|
assert_nil long_rule.error
|
@@ -93,7 +93,7 @@ class TestRule < Minitest::Test
|
|
93
93
|
assert rule.valid?
|
94
94
|
assert_nil rule.error
|
95
95
|
|
96
|
-
long_rule = PowerTrack::Rule.new(phrase,
|
96
|
+
long_rule = PowerTrack::Rule.new(phrase, long: true)
|
97
97
|
assert long_rule.long?
|
98
98
|
assert long_rule.valid?
|
99
99
|
assert_nil long_rule.error
|
@@ -104,7 +104,7 @@ class TestRule < Minitest::Test
|
|
104
104
|
assert !rule.valid?
|
105
105
|
assert_match /too many negative terms/i, rule.error
|
106
106
|
|
107
|
-
long_rule = PowerTrack::Rule.new(phrase,
|
107
|
+
long_rule = PowerTrack::Rule.new(phrase, long: true)
|
108
108
|
assert long_rule.long?
|
109
109
|
assert long_rule.valid?
|
110
110
|
assert_nil long_rule.error
|
@@ -125,7 +125,7 @@ class TestRule < Minitest::Test
|
|
125
125
|
assert_equal MultiJson.encode(res), rule.to_json
|
126
126
|
|
127
127
|
res[:tag] = 'soda'
|
128
|
-
rule = PowerTrack::Rule.new(res[:value], res[:tag], true)
|
128
|
+
rule = PowerTrack::Rule.new(res[:value], tag: res[:tag], long: true)
|
129
129
|
assert_equal res, rule.to_hash
|
130
130
|
assert_equal MultiJson.encode(res), rule.to_json
|
131
131
|
end
|
@@ -141,9 +141,9 @@ class TestRule < Minitest::Test
|
|
141
141
|
|
142
142
|
def test_hash
|
143
143
|
short_rule = PowerTrack::Rule.new('coke')
|
144
|
-
not_long_rule = PowerTrack::Rule.new('coke',
|
145
|
-
false_long_rule = PowerTrack::Rule.new('coke',
|
146
|
-
short_rule_with_tag = PowerTrack::Rule.new('coke', 'soda')
|
144
|
+
not_long_rule = PowerTrack::Rule.new('coke', long: false)
|
145
|
+
false_long_rule = PowerTrack::Rule.new('coke', long: true)
|
146
|
+
short_rule_with_tag = PowerTrack::Rule.new('coke', tag: 'soda')
|
147
147
|
|
148
148
|
assert short_rule == not_long_rule
|
149
149
|
assert_equal short_rule, not_long_rule
|
@@ -158,6 +158,6 @@ class TestRule < Minitest::Test
|
|
158
158
|
assert_equal 2, h[short_rule]
|
159
159
|
assert_equal h[short_rule], h[not_long_rule]
|
160
160
|
assert_equal 4, h[short_rule_with_tag]
|
161
|
-
assert_nil h[PowerTrack::Rule.new('pepsi', 'soda')]
|
161
|
+
assert_nil h[PowerTrack::Rule.new('pepsi', tag: 'soda')]
|
162
162
|
end
|
163
163
|
end
|
data/test/test_track_stream.rb
CHANGED
@@ -4,28 +4,46 @@ require 'multi_json'
|
|
4
4
|
|
5
5
|
class TestTrackStream < Minitest::Test
|
6
6
|
|
7
|
-
def
|
8
|
-
track_simple_stream(false)
|
7
|
+
def test_track_realtime_stream_v1
|
8
|
+
track_simple_stream(false, false)
|
9
9
|
end
|
10
10
|
|
11
|
-
def
|
12
|
-
track_simple_stream(true)
|
11
|
+
def test_track_realtime_stream_v2
|
12
|
+
track_simple_stream(true, false)
|
13
13
|
end
|
14
14
|
|
15
|
-
def
|
16
|
-
|
15
|
+
def test_track_replay_stream_v1
|
16
|
+
track_simple_stream(false, true)
|
17
|
+
end
|
18
|
+
|
19
|
+
# def test_track_replay_stream_v2
|
20
|
+
# track_simple_stream(true, true)
|
21
|
+
# end
|
22
|
+
|
23
|
+
def track_simple_stream(v2, replay)
|
24
|
+
stream = new_stream(v2, replay)
|
17
25
|
|
18
26
|
# add a logger
|
19
27
|
stream.logger = Logger.new(STDERR)
|
20
28
|
|
21
|
-
|
22
|
-
assert
|
29
|
+
new_rule = PowerTrack::Rule.new('ny OR nyc OR #nyc OR new york')
|
30
|
+
assert new_rule.valid?
|
23
31
|
|
24
32
|
begin
|
25
|
-
|
33
|
+
res = stream.add_rule(new_rule)
|
34
|
+
|
35
|
+
if v2
|
36
|
+
assert res.is_a?(Hash)
|
37
|
+
assert res['summary'].is_a?(Hash)
|
38
|
+
else
|
39
|
+
assert_nil res
|
40
|
+
end
|
41
|
+
|
26
42
|
rules_after_addition = stream.list_rules
|
27
43
|
assert rules_after_addition.is_a?(Array)
|
28
44
|
assert rules_after_addition.size > 0
|
45
|
+
assert rules_after_addition.any? { |rule| rule == new_rule }
|
46
|
+
assert rules_after_addition.all? { |rule| !rule.id.nil? } if v2
|
29
47
|
|
30
48
|
heartbeats = 0
|
31
49
|
received = 0
|
@@ -87,7 +105,8 @@ class TestTrackStream < Minitest::Test
|
|
87
105
|
assert (ended_at - started_at) >= delay
|
88
106
|
end
|
89
107
|
|
90
|
-
|
108
|
+
# heartbeats only sent every 10 minutes in v2...
|
109
|
+
assert heartbeats > 0, 'No heartbeat received' unless v2
|
91
110
|
puts "#{heartbeats} heartbeats received"
|
92
111
|
|
93
112
|
assert received > 0, 'No message received so far'
|
@@ -95,8 +114,19 @@ class TestTrackStream < Minitest::Test
|
|
95
114
|
|
96
115
|
assert tweeted > 0, 'No tweet received so far'
|
97
116
|
puts "#{tweeted} tweets received"
|
117
|
+
rescue
|
118
|
+
p $!
|
98
119
|
ensure
|
99
|
-
|
120
|
+
res = stream.delete_rules(new_rule)
|
121
|
+
|
122
|
+
if v2
|
123
|
+
assert res.is_a?(Hash)
|
124
|
+
assert res['summary'].is_a?(Hash)
|
125
|
+
assert_equal 1, res['summary']['deleted']
|
126
|
+
assert_equal 0, res['summary']['not_deleted']
|
127
|
+
else
|
128
|
+
assert_nil res
|
129
|
+
end
|
100
130
|
end
|
101
131
|
end
|
102
132
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: powertrack
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Laurent Farcy
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2016-
|
13
|
+
date: 2016-08-10 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: bundler
|