webwatchr 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +13 -0
- data/LICENSE +674 -0
- data/README.md +122 -0
- data/Rakefile +11 -0
- data/lib/sites/bandcamp.rb +60 -0
- data/lib/sites/bsky.rb +176 -0
- data/lib/sites/postch.rb +112 -0
- data/lib/sites/songkick.rb +28 -0
- data/lib/webwatchr/alerting.rb +129 -0
- data/lib/webwatchr/base.rb +119 -0
- data/lib/webwatchr/logger.rb +31 -0
- data/lib/webwatchr/main.rb +51 -0
- data/lib/webwatchr/site.rb +575 -0
- data/lib/webwatchr.rb +1 -0
- data/tests/helpers.rb +32 -0
- data/tests/infra_test.rb +271 -0
- data/webwatchr.gemspec +12 -0
- metadata +56 -0
|
@@ -0,0 +1,575 @@
|
|
|
1
|
+
require "digest/md5"
|
|
2
|
+
require "fileutils"
|
|
3
|
+
require "json"
|
|
4
|
+
require "logger"
|
|
5
|
+
require "net/http"
|
|
6
|
+
require "nokogiri"
|
|
7
|
+
require_relative "./logger"
|
|
8
|
+
|
|
9
|
+
class Site
|
|
10
|
+
include Loggable
|
|
11
|
+
class ParseError < StandardError
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
class RedirectError < StandardError
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
HTML_HEADER = "<!DOCTYPE html>\n<meta charset=\"utf-8\">\n".freeze
|
|
18
|
+
DEFAULT_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36'.freeze
|
|
19
|
+
|
|
20
|
+
attr_accessor :url, :alerters, :rand_sleep, :every, :lastdir, :cache_dir, :state_file, :comment
|
|
21
|
+
|
|
22
|
+
attr_writer :name
|
|
23
|
+
|
|
24
|
+
def set(name, value)
|
|
25
|
+
instance_variable_set("@#{name}", value)
|
|
26
|
+
self
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def name
|
|
30
|
+
@url.dup
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def self.create(&block)
|
|
34
|
+
if block
|
|
35
|
+
new.instance_eval(&block)
|
|
36
|
+
else
|
|
37
|
+
new
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def method_missing(attr, *args) # rubocop:disable Style/MissingRespondToMissing
|
|
42
|
+
if args.empty?
|
|
43
|
+
instance_variable_get("@#{attr}")
|
|
44
|
+
else
|
|
45
|
+
instance_variable_set("@#{attr}", *args)
|
|
46
|
+
self
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def initialize()
|
|
51
|
+
@useragent = Site::DEFAULT_USER_AGENT
|
|
52
|
+
@extra_headers = {}
|
|
53
|
+
@alerters = []
|
|
54
|
+
@alert_only = []
|
|
55
|
+
@http_ver = 1
|
|
56
|
+
@rand_sleep = 0
|
|
57
|
+
@did_stuff = false
|
|
58
|
+
@every = 3600
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def set_http_header(key, value)
|
|
62
|
+
@extra_headers[key] = value
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def fetch_url(url, max_redir: 10)
|
|
66
|
+
if @http_ver == 2
|
|
67
|
+
return fetch_url2(url)
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
return fetch_url1(url, max_redir: max_redir)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Helper methonds for generating HTML emails
|
|
74
|
+
|
|
75
|
+
def get_email_url()
|
|
76
|
+
return @url
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def get_email_subject()
|
|
80
|
+
subject = "Update from #{self.class}"
|
|
81
|
+
if @comment
|
|
82
|
+
subject += " (#{@comment})"
|
|
83
|
+
end
|
|
84
|
+
return subject
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def generate_html_content()
|
|
88
|
+
return nil unless @content
|
|
89
|
+
|
|
90
|
+
message_html = Site::HTML_HEADER.dup
|
|
91
|
+
message_html += @content
|
|
92
|
+
return message_html
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Helper methods to generate Telegram content
|
|
96
|
+
def generate_telegram_message_pieces()
|
|
97
|
+
return [@content]
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def fetch_url2(url)
|
|
101
|
+
require "curb"
|
|
102
|
+
|
|
103
|
+
if @post_data
|
|
104
|
+
cmethod = Curl::Easy.method(:http_post)
|
|
105
|
+
params = [url, @post_data]
|
|
106
|
+
else
|
|
107
|
+
cmethod = Curl::Easy.method(:new)
|
|
108
|
+
params = [url]
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
c = cmethod.call(*params) do |curl|
|
|
112
|
+
curl.set(:HTTP_VERSION, Curl::HTTP_2_0)
|
|
113
|
+
if @useragent
|
|
114
|
+
curl.headers['User-Agent'] = @useragent
|
|
115
|
+
end
|
|
116
|
+
curl.verbose = true
|
|
117
|
+
@extra_headers.each do |k, v|
|
|
118
|
+
curl.headers[k] = v
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
c.perform
|
|
123
|
+
return c.body_str
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
def fetch_url1(url, max_redir: 10)
|
|
127
|
+
html = ""
|
|
128
|
+
uri = URI(url)
|
|
129
|
+
req = nil
|
|
130
|
+
http_o = Net::HTTP.new(uri.host, uri.port)
|
|
131
|
+
http_o.use_ssl = (uri.scheme == 'https')
|
|
132
|
+
http_o.set_debug_output $stderr if $VERBOSE
|
|
133
|
+
http_o.start do |http|
|
|
134
|
+
if @post_data
|
|
135
|
+
req = Net::HTTP::Post.new(uri)
|
|
136
|
+
req.set_form_data(@post_data)
|
|
137
|
+
elsif @post_json
|
|
138
|
+
req = Net::HTTP::Post.new(uri, 'Content-Type' => 'application/json')
|
|
139
|
+
req.body = if @post_json.instance_of?(String)
|
|
140
|
+
@post_json
|
|
141
|
+
else
|
|
142
|
+
@post_json.to_json
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
else
|
|
146
|
+
req = Net::HTTP::Get.new(uri)
|
|
147
|
+
end
|
|
148
|
+
if @useragent
|
|
149
|
+
req["User-Agent"] = @useragent
|
|
150
|
+
end
|
|
151
|
+
@extra_headers.each do |k, v|
|
|
152
|
+
req[k] = v
|
|
153
|
+
end
|
|
154
|
+
response = http.request(req)
|
|
155
|
+
case response.code
|
|
156
|
+
when "301", "302"
|
|
157
|
+
if max_redir == 0
|
|
158
|
+
raise Site::RedirectError
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
location = response["Location"]
|
|
162
|
+
unless location.start_with?("http")
|
|
163
|
+
location = if location.start_with?("/")
|
|
164
|
+
"#{uri.scheme}://#{uri.hostname}:#{uri.port}#{location}"
|
|
165
|
+
else
|
|
166
|
+
"#{uri.scheme}://#{uri.hostname}:#{uri.port}/#{location}"
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
logger.debug "Redirecting to #{location}"
|
|
171
|
+
return fetch_url(location, max_redir: max_redir - 1)
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
html = response.body
|
|
175
|
+
|
|
176
|
+
if html && (html =~ /meta http-equiv="refresh" content="0;URL='(.*)'/)
|
|
177
|
+
if max_redir == 0
|
|
178
|
+
raise Site::RedirectError
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
url = "#{uri.scheme}://#{uri.hostname}:#{uri.port}#{::Regexp.last_match(1)}"
|
|
182
|
+
logger.debug "Redirecting to #{location}"
|
|
183
|
+
return fetch_url(url, max_redir: max_redir - 1)
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
html = if html and response["Content-Encoding"]
|
|
187
|
+
html.force_encoding(response["Content-Encoding"])
|
|
188
|
+
else
|
|
189
|
+
html.encode("UTF-8", "binary", invalid: :replace, undef: :replace, replace: "")
|
|
190
|
+
end
|
|
191
|
+
end
|
|
192
|
+
logger.debug "Fetched #{url}"
|
|
193
|
+
return html
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
def parse_content(html)
|
|
197
|
+
return parse_noko(html)
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
def parse_noko(html)
|
|
201
|
+
noko = Nokogiri::HTML(html)
|
|
202
|
+
meta = noko.css("meta")
|
|
203
|
+
meta.each do |m|
|
|
204
|
+
if m['charset']
|
|
205
|
+
html = html.force_encoding(m['charset'])
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
return Nokogiri::HTML(html)
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
def load_state_file()
|
|
212
|
+
if @state_file and File.exist?(@state_file)
|
|
213
|
+
begin
|
|
214
|
+
return JSON.parse(File.read(@state_file), create_additions: true)
|
|
215
|
+
rescue JSON::ParserError
|
|
216
|
+
end
|
|
217
|
+
end
|
|
218
|
+
return {}
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
def save_state_file(hash)
|
|
222
|
+
File.open(@state_file, "w") do |f|
|
|
223
|
+
f.write JSON.pretty_generate(hash)
|
|
224
|
+
end
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
def update_state_file(hash)
|
|
228
|
+
previous_state = load_state_file()
|
|
229
|
+
previous_state.update({
|
|
230
|
+
"time" => Time.now.to_i,
|
|
231
|
+
"url" => @url,
|
|
232
|
+
"wait" => @wait
|
|
233
|
+
})
|
|
234
|
+
state = previous_state.update(hash)
|
|
235
|
+
save_state_file(state)
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
def alert()
|
|
239
|
+
logger.debug "Alerting new stuff"
|
|
240
|
+
@alerters.each do |alerter|
|
|
241
|
+
alerter.alert(self) unless @alert_only.include?(alerter.class::IDENTIFIER)
|
|
242
|
+
end
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
def content()
|
|
246
|
+
unless @did_stuff
|
|
247
|
+
raise StandardError, 'Trying to access @content, but we have not pulled any data yet'
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
return @content
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
def get_content()
|
|
254
|
+
return @html_content
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
def alert_only(alerter_identifiers)
|
|
258
|
+
if alerter_identifiers.instance_of?(Symbol)
|
|
259
|
+
@alert_only = [alerter_identifiers]
|
|
260
|
+
elsif alerter_identifiers.instance_of(Array)
|
|
261
|
+
@alert_only = alerter_identifiers
|
|
262
|
+
else
|
|
263
|
+
raise StandardError, "unknown type of provided alerter identifier #{alerter_identifiers}"
|
|
264
|
+
end
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
def should_update?(prevous_time)
|
|
268
|
+
return Time.now().to_i >= prevous_time + @wait
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
def get_new(_previous_content = nil)
|
|
272
|
+
@content = get_content()
|
|
273
|
+
return @content
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
def update(cache_dir:, last_dir:, test: false)
|
|
277
|
+
raise StandardError, "Didn't set URL for site #{self}" unless @url
|
|
278
|
+
|
|
279
|
+
md5 = Digest::MD5.hexdigest(@url)
|
|
280
|
+
@cache_dir = File.join(cache_dir, "cache-#{URI.parse(@url).hostname}-#{md5}")
|
|
281
|
+
@state_file = File.join(last_dir, "last-#{URI.parse(@url).hostname}-#{md5}")
|
|
282
|
+
state = load_state_file()
|
|
283
|
+
@wait = @every || state["wait"] || 60 * 60
|
|
284
|
+
@test = test
|
|
285
|
+
logger.debug "using #{@state_file} to store updates, and #{@cache_dir} for Cache"
|
|
286
|
+
|
|
287
|
+
do_stuff()
|
|
288
|
+
rescue Site::RedirectError
|
|
289
|
+
msg = "Error parsing page #{@url}, too many redirects"
|
|
290
|
+
msg += ". Will retry in #{@wait} + 30 minutes"
|
|
291
|
+
logger.error msg
|
|
292
|
+
warn msg
|
|
293
|
+
update_state_file({ "wait" => @wait + 30 * 60 })
|
|
294
|
+
rescue Site::ParseError => e
|
|
295
|
+
msg = "Error parsing page #{@url}"
|
|
296
|
+
if e.message
|
|
297
|
+
msg += " with error : #{e.message}"
|
|
298
|
+
end
|
|
299
|
+
msg += ". Will retry in #{@wait} + 30 minutes"
|
|
300
|
+
logger.error msg
|
|
301
|
+
warn msg
|
|
302
|
+
update_state_file({ "wait" => @wait + 30 * 60 })
|
|
303
|
+
rescue Errno::ECONNREFUSED, Net::ReadTimeout, OpenSSL::SSL::SSLError, Net::OpenTimeout => e
|
|
304
|
+
msg = "Network error on #{@url}"
|
|
305
|
+
if e.message
|
|
306
|
+
msg += " : #{e.message}"
|
|
307
|
+
end
|
|
308
|
+
msg += ". Will retry in #{@wait} + 30 minutes"
|
|
309
|
+
logger.error msg
|
|
310
|
+
warn msg
|
|
311
|
+
update_state_file({ "wait" => @wait + 30 * 60 })
|
|
312
|
+
end
|
|
313
|
+
|
|
314
|
+
def pull_things()
|
|
315
|
+
@html_content = fetch_url(@url)
|
|
316
|
+
@parsed_content = parse_content(@html_content)
|
|
317
|
+
end
|
|
318
|
+
|
|
319
|
+
def do_stuff()
|
|
320
|
+
new_stuff = false
|
|
321
|
+
previous_state = {
|
|
322
|
+
"time" => -9_999_999_999_999,
|
|
323
|
+
"content" => nil
|
|
324
|
+
}
|
|
325
|
+
state = load_state_file()
|
|
326
|
+
if state
|
|
327
|
+
previous_state.update(state)
|
|
328
|
+
end
|
|
329
|
+
previous_content = previous_state["content"]
|
|
330
|
+
if should_update?(previous_state["time"]) or @test
|
|
331
|
+
if @rand_sleep > 0 and not @test
|
|
332
|
+
logger.info "Time to update #{@url} (sleeping #{@rand_sleep} sec)"
|
|
333
|
+
sleep(@rand_sleep)
|
|
334
|
+
else
|
|
335
|
+
logger.info "Time to update #{@url}"
|
|
336
|
+
end
|
|
337
|
+
pull_things()
|
|
338
|
+
new_stuff = get_new(previous_content)
|
|
339
|
+
@did_stuff = true
|
|
340
|
+
if new_stuff
|
|
341
|
+
if @test
|
|
342
|
+
logger.info "Would have alerted with new stuff:\n#{new_stuff}"
|
|
343
|
+
else
|
|
344
|
+
alert()
|
|
345
|
+
update_state_file({
|
|
346
|
+
"content" => new_stuff,
|
|
347
|
+
"previous_content" => previous_content
|
|
348
|
+
})
|
|
349
|
+
end
|
|
350
|
+
else
|
|
351
|
+
logger.info "Nothing new for #{@url}"
|
|
352
|
+
if @test
|
|
353
|
+
logger.info "Current state is still :\n#{@content}"
|
|
354
|
+
end
|
|
355
|
+
end
|
|
356
|
+
update_state_file({}) unless @test
|
|
357
|
+
else
|
|
358
|
+
@did_stuff = true
|
|
359
|
+
logger.info "Too soon to update #{@url}"
|
|
360
|
+
end
|
|
361
|
+
end
|
|
362
|
+
|
|
363
|
+
class SimpleString < Site
|
|
364
|
+
class ResultObject
|
|
365
|
+
attr_accessor :message
|
|
366
|
+
|
|
367
|
+
def initialize(message = '')
|
|
368
|
+
@message = message
|
|
369
|
+
end
|
|
370
|
+
|
|
371
|
+
def to_telegram()
|
|
372
|
+
return @message
|
|
373
|
+
end
|
|
374
|
+
|
|
375
|
+
def to_s
|
|
376
|
+
return @message
|
|
377
|
+
end
|
|
378
|
+
|
|
379
|
+
def to_html()
|
|
380
|
+
return @message
|
|
381
|
+
end
|
|
382
|
+
|
|
383
|
+
def to_json(*args)
|
|
384
|
+
{
|
|
385
|
+
JSON.create_id => self.class.name,
|
|
386
|
+
'message' => @message
|
|
387
|
+
}.to_json(*args)
|
|
388
|
+
end
|
|
389
|
+
|
|
390
|
+
def self.json_create(object)
|
|
391
|
+
new(*object['message'])
|
|
392
|
+
end
|
|
393
|
+
|
|
394
|
+
def ==(other)
|
|
395
|
+
self.class == other.class &&
|
|
396
|
+
@message == other.message
|
|
397
|
+
end
|
|
398
|
+
end
|
|
399
|
+
|
|
400
|
+
def get_new(previous_content = nil)
|
|
401
|
+
# Is a ResultObject
|
|
402
|
+
if @content
|
|
403
|
+
raise StandardError, "The result of get_content() should be a ResultObject if the Site class is SimpleString" unless @content.class < ResultObject
|
|
404
|
+
else
|
|
405
|
+
@content = get_content()
|
|
406
|
+
end
|
|
407
|
+
return nil if @content == previous_content
|
|
408
|
+
|
|
409
|
+
return @content
|
|
410
|
+
end
|
|
411
|
+
|
|
412
|
+
def generate_html_content()
|
|
413
|
+
return nil unless @content
|
|
414
|
+
|
|
415
|
+
message_html = Site::HTML_HEADER.dup
|
|
416
|
+
if @content.is_a?(ResultObject)
|
|
417
|
+
message_html += @content.to_html
|
|
418
|
+
else
|
|
419
|
+
message_html += @content
|
|
420
|
+
end
|
|
421
|
+
return message_html
|
|
422
|
+
end
|
|
423
|
+
|
|
424
|
+
def generate_telegram_message_pieces()
|
|
425
|
+
return [@content.is_a?(ResultObject) ? @content.to_telegram : @content]
|
|
426
|
+
end
|
|
427
|
+
end
|
|
428
|
+
|
|
429
|
+
class DiffString < SimpleString
|
|
430
|
+
begin
|
|
431
|
+
require "diffy"
|
|
432
|
+
|
|
433
|
+
def generate_html_content()
|
|
434
|
+
diff_html = Site::HTML_HEADER.dup
|
|
435
|
+
diff_html += "<head><style>"
|
|
436
|
+
diff_html += Diffy::CSS
|
|
437
|
+
diff_html += "</style><body>"
|
|
438
|
+
diff_html += @diffed.to_s(:html)
|
|
439
|
+
diff_html += "</body></html>"
|
|
440
|
+
return diff_html
|
|
441
|
+
end
|
|
442
|
+
|
|
443
|
+
def get_differ(previous, new)
|
|
444
|
+
return Diffy::Diff.new(previous, new)
|
|
445
|
+
end
|
|
446
|
+
rescue LoadError
|
|
447
|
+
require "test/unit/diff"
|
|
448
|
+
def generate_html_content()
|
|
449
|
+
diff_html = Site::HTML_HEADER.dup
|
|
450
|
+
diff_html += @diffed.to_s
|
|
451
|
+
diff_html += "</body></html>"
|
|
452
|
+
return diff_html
|
|
453
|
+
end
|
|
454
|
+
|
|
455
|
+
def get_differ(previous, new)
|
|
456
|
+
return new unless previous
|
|
457
|
+
|
|
458
|
+
return Test::Unit::Diff.unified(previous, new)
|
|
459
|
+
end
|
|
460
|
+
end
|
|
461
|
+
|
|
462
|
+
def get_new(previous_content = nil)
|
|
463
|
+
new_stuff = nil
|
|
464
|
+
@content = get_content()
|
|
465
|
+
unless @content
|
|
466
|
+
return nil
|
|
467
|
+
end
|
|
468
|
+
|
|
469
|
+
if @content != previous_content
|
|
470
|
+
@diffed = get_differ(previous_content, @content)
|
|
471
|
+
new_stuff = @diffed.to_s
|
|
472
|
+
end
|
|
473
|
+
return new_stuff
|
|
474
|
+
end
|
|
475
|
+
end
|
|
476
|
+
|
|
477
|
+
class Articles < Site
|
|
478
|
+
def initialize
|
|
479
|
+
super
|
|
480
|
+
@content = []
|
|
481
|
+
end
|
|
482
|
+
|
|
483
|
+
def validate(item)
|
|
484
|
+
raise StandardError, "Needs at least \"id\" key" unless item["id"]
|
|
485
|
+
|
|
486
|
+
id = item["id"]
|
|
487
|
+
raise StandardError, "\"id\" key needs to be a String and not #{id.class}" unless id.is_a?(String)
|
|
488
|
+
end
|
|
489
|
+
|
|
490
|
+
def add_article(item)
|
|
491
|
+
logger.debug "Found article #{item['id']}"
|
|
492
|
+
validate(item)
|
|
493
|
+
item["_timestamp"] = Time.now().to_i
|
|
494
|
+
@content << item unless @content.map { |x| x['id'] }.include?(item['id'])
|
|
495
|
+
end
|
|
496
|
+
|
|
497
|
+
def get_new(previous_content)
|
|
498
|
+
new_stuff = []
|
|
499
|
+
get_content()
|
|
500
|
+
unless @content
|
|
501
|
+
return nil
|
|
502
|
+
end
|
|
503
|
+
|
|
504
|
+
if previous_content
|
|
505
|
+
previous_ids = previous_content.map { |h| h["id"] }
|
|
506
|
+
new_stuff = @content.delete_if { |item| previous_ids.include?(item["id"]) }
|
|
507
|
+
else
|
|
508
|
+
new_stuff = @content
|
|
509
|
+
end
|
|
510
|
+
if (not new_stuff) or new_stuff.empty?
|
|
511
|
+
return nil
|
|
512
|
+
end
|
|
513
|
+
|
|
514
|
+
return new_stuff
|
|
515
|
+
end
|
|
516
|
+
|
|
517
|
+
def update_state_file(hash)
|
|
518
|
+
hash_content = hash["content"]
|
|
519
|
+
hash.delete("content")
|
|
520
|
+
previous_state = load_state_file()
|
|
521
|
+
previous_state.update({
|
|
522
|
+
"time" => Time.now.to_i,
|
|
523
|
+
"url" => @url,
|
|
524
|
+
"wait" => @wait
|
|
525
|
+
})
|
|
526
|
+
state = previous_state.update(hash)
|
|
527
|
+
if hash_content
|
|
528
|
+
(previous_state["content"] ||= []).concat(hash_content)
|
|
529
|
+
end
|
|
530
|
+
save_state_file(state)
|
|
531
|
+
end
|
|
532
|
+
|
|
533
|
+
def generate_html_content()
|
|
534
|
+
message_html = Site::HTML_HEADER.dup
|
|
535
|
+
message_html << "<ul style='list-style-type: none;'>\n"
|
|
536
|
+
@content.each do |item|
|
|
537
|
+
msg = "<li id='#{item['id']}'>"
|
|
538
|
+
if item["url"]
|
|
539
|
+
msg += "<a href='#{item['url']}'>"
|
|
540
|
+
end
|
|
541
|
+
if item["img_src"]
|
|
542
|
+
msg += "<img style='width:100px' src='#{item['img_src']}'/>"
|
|
543
|
+
end
|
|
544
|
+
if item["title"]
|
|
545
|
+
msg += item['title'].to_s
|
|
546
|
+
end
|
|
547
|
+
if item["url"]
|
|
548
|
+
msg += "</a>"
|
|
549
|
+
end
|
|
550
|
+
msg += "</li>\n"
|
|
551
|
+
message_html += msg
|
|
552
|
+
end
|
|
553
|
+
message_html += "</ul>"
|
|
554
|
+
return message_html
|
|
555
|
+
end
|
|
556
|
+
|
|
557
|
+
def generate_telegram_message_pieces()
|
|
558
|
+
msg_pieces = []
|
|
559
|
+
@content.each do |item|
|
|
560
|
+
line = item["title"]
|
|
561
|
+
if item["url"]
|
|
562
|
+
if line
|
|
563
|
+
line += ": #{item['url']}"
|
|
564
|
+
else
|
|
565
|
+
line = item["url"]
|
|
566
|
+
end
|
|
567
|
+
|
|
568
|
+
line += ": #{item['url']}"
|
|
569
|
+
end
|
|
570
|
+
msg_pieces << line
|
|
571
|
+
end
|
|
572
|
+
return msg_pieces
|
|
573
|
+
end
|
|
574
|
+
end
|
|
575
|
+
end
|
data/lib/webwatchr.rb
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
require "webwatchr/main"
|
data/tests/helpers.rb
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
require "fileutils"
|
|
2
|
+
require "tmpdir"
|
|
3
|
+
require "test/unit"
|
|
4
|
+
|
|
5
|
+
require_relative "../lib/webwatchr/alerting"
|
|
6
|
+
|
|
7
|
+
class ArticleSiteTest < Test::Unit::TestCase
|
|
8
|
+
def fakeupdate(site)
|
|
9
|
+
workdir = Dir.mktmpdir('fakesite')
|
|
10
|
+
cache_dir = File.join(workdir, 'cache')
|
|
11
|
+
last_dir = File.join(workdir, 'last')
|
|
12
|
+
|
|
13
|
+
FileUtils.mkdir_p(cache_dir)
|
|
14
|
+
FileUtils.mkdir_p(last_dir)
|
|
15
|
+
site.update(cache_dir: cache_dir, last_dir: last_dir)
|
|
16
|
+
FileUtils.rm_rf(workdir)
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
class TestAlerter < Webwatchr::Alerting::Base
|
|
21
|
+
IDENTIFIER = :testtest
|
|
22
|
+
attr_accessor :result
|
|
23
|
+
|
|
24
|
+
def initialize
|
|
25
|
+
super()
|
|
26
|
+
@result = nil
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def alert(site)
|
|
30
|
+
@result = site.content
|
|
31
|
+
end
|
|
32
|
+
end
|