webwatchr 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +42 -9
- data/lib/sites/bandcamp.rb +6 -10
- data/lib/sites/bsky.rb +6 -9
- data/lib/sites/postch.rb +4 -3
- data/lib/sites/postnl.rb +54 -0
- data/lib/sites/songkick.rb +2 -2
- data/lib/webwatchr/main.rb +5 -5
- data/lib/webwatchr/site.rb +181 -152
- data/tests/helpers.rb +7 -1
- data/tests/infra_test.rb +37 -46
- data/webwatchr.gemspec +1 -1
- metadata +2 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: a2ceb7b164a65a05718c637bf20288b99bd6dc71f8e048508e1e5e45db03b351
|
|
4
|
+
data.tar.gz: 7f7356e8e9806d1cca36cae95633a0de35c5ac0881061f3dd44de415144ee3c2
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 5a905d1ec16cedc8e60aba00f1c1a898fd9282a1d5a159a44467feb11d22fa6f75c92df016d3cb01a40679ea8a91dbf5a459e9ed6592f4417ddb2ad0b6c3c113
|
|
7
|
+
data.tar.gz: 6b47cc97bc17c24b1589b1fc0ecb1ea51b6eef14e2c01a77cecc2583df6d21f6ed36cf98d98fc237cccd7909a27e56b3b5a58dc067b4c2667afe5fd912cdec8a
|
data/README.md
CHANGED
|
@@ -6,7 +6,8 @@ Silly script to periodically check webpage changes.
|
|
|
6
6
|
2. pulls data for every Website to check, if the last time we did that is long ago
|
|
7
7
|
4. if content is different, from the last time, alerts you with the new content (email, telegram)
|
|
8
8
|
|
|
9
|
-
|
|
9
|
+
|
|
10
|
+
## Installation
|
|
10
11
|
|
|
11
12
|
```shell
|
|
12
13
|
|
|
@@ -29,7 +30,7 @@ class SomeSimpleSite < Site::SimpleString
|
|
|
29
30
|
# Implement this function, to return what you want to compare every run
|
|
30
31
|
def get_content
|
|
31
32
|
res = ""
|
|
32
|
-
@
|
|
33
|
+
@parsed_html.css("div.shop-main a").map do |a|
|
|
33
34
|
url = "https://somesimplesite.com/shop/#{a['href']}"
|
|
34
35
|
if a.css('img')[0]['src'] == "soldout.png"
|
|
35
36
|
next
|
|
@@ -81,7 +82,7 @@ Run the cron often:
|
|
|
81
82
|
*/5 * * * * cd /home/poil/my_fav_scripts/; ruby dsl.rb
|
|
82
83
|
```
|
|
83
84
|
|
|
84
|
-
|
|
85
|
+
## Supported websites
|
|
85
86
|
|
|
86
87
|
List of sites that are somewhat maintained are [listed here](https://github.com/conchyliculture/webwatchr).
|
|
87
88
|
|
|
@@ -92,6 +93,18 @@ Some examples:
|
|
|
92
93
|
* Package tracking (DHL, Colissimo, i-parcel, Royalmail, PostNL, UPS, USPS, etc.)
|
|
93
94
|
|
|
94
95
|
|
|
96
|
+
## Command line options
|
|
97
|
+
|
|
98
|
+
From `--help`:
|
|
99
|
+
|
|
100
|
+
```
|
|
101
|
+
Usage: ruby /home/renzokuken/scripts/webwatchr/lib/webwatchr/main.rb
|
|
102
|
+
-s, --site=SITE Run Webwatchr on one site only. It has to be the name of the class for that site.
|
|
103
|
+
-v, --verbose Be verbose (output to STDOUT instead of logfile
|
|
104
|
+
-t, --test Check website (ignoring wait time) and show what we've parsed
|
|
105
|
+
-h, --help Prints this help
|
|
106
|
+
```
|
|
107
|
+
|
|
95
108
|
## Force a site check, ignoring the 'wait' parameter
|
|
96
109
|
|
|
97
110
|
This can be useful to run a site update at a specific time/day with a crontab, instead of every specified amount of time. You can force update a website using the -s flag:
|
|
@@ -99,24 +112,44 @@ This can be useful to run a site update at a specific time/day with a crontab, i
|
|
|
99
112
|
ruby webwatchr.rb -t -s SiteClass
|
|
100
113
|
```
|
|
101
114
|
|
|
102
|
-
|
|
103
|
-
|
|
115
|
+
## FAQ
|
|
116
|
+
### Tests?
|
|
104
117
|
|
|
105
|
-
There are like like, two!
|
|
118
|
+
There are like like, two!
|
|
106
119
|
|
|
107
120
|
Run `rake`
|
|
108
121
|
|
|
109
|
-
|
|
122
|
+
### Logs ?
|
|
110
123
|
|
|
111
124
|
Call `logger`, as you would a classic `Logger` object in your `mysite.rb`.
|
|
112
125
|
|
|
113
|
-
|
|
126
|
+
### Alerting
|
|
114
127
|
|
|
115
128
|
Email is the main method of alerting, but you can also set webwatchr to talk to you on Telegram through a bot.
|
|
116
129
|
|
|
117
|
-
|
|
130
|
+
#### Email
|
|
131
|
+
|
|
132
|
+
In your Main block, add
|
|
133
|
+
|
|
134
|
+
```ruby
|
|
135
|
+
add_default_alert :email do
|
|
136
|
+
set :smtp_port, 25
|
|
137
|
+
set :smtp_server, "localhost"
|
|
138
|
+
set :from_addr, "webwatchr@domain.eu"
|
|
139
|
+
set :dest_addr, "admin@domain.eu"
|
|
140
|
+
end
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
#### Telegram
|
|
118
144
|
|
|
119
145
|
First make a bot and grab a token following the [Telegram procedure](https://core.telegram.org/bots#6-botfather).
|
|
120
146
|
|
|
121
147
|
You also need to know the `chat_id` for its discussion with you. The code in [there](https://github.com/atipugin/telegram-bot-ruby/blob/master/examples/bot.rb) can help you.
|
|
122
148
|
|
|
149
|
+
then in your Main block, add
|
|
150
|
+
|
|
151
|
+
```ruby
|
|
152
|
+
add_default_alert :telegram do
|
|
153
|
+
set :token, "12345:LONGTOKEN09876543"
|
|
154
|
+
set :chat_id, 1234567890
|
|
155
|
+
end
|
data/lib/sites/bandcamp.rb
CHANGED
|
@@ -10,12 +10,12 @@ class BandcampMerch < Site::Articles
|
|
|
10
10
|
self
|
|
11
11
|
end
|
|
12
12
|
|
|
13
|
-
def
|
|
14
|
-
if @
|
|
13
|
+
def extract_articles()
|
|
14
|
+
if @website_html =~ /You are being redirected, please follow <a href="([^"]+)"/
|
|
15
15
|
new_url = ::Regexp.last_match(1)
|
|
16
|
-
@
|
|
17
|
-
@
|
|
18
|
-
item = @
|
|
16
|
+
@website_html = Net::HTTP.get(URI.parse(new_url))
|
|
17
|
+
@parsed_html = Nokogiri::HTML.parse(@website_html)
|
|
18
|
+
item = @parsed_html.css('div#merch-item')
|
|
19
19
|
if item.css(".notable").text == "Sold Out"
|
|
20
20
|
logger.debug "That item is sold out =("
|
|
21
21
|
return
|
|
@@ -30,11 +30,7 @@ class BandcampMerch < Site::Articles
|
|
|
30
30
|
"title" => title
|
|
31
31
|
})
|
|
32
32
|
else
|
|
33
|
-
|
|
34
|
-
f.write(@html_content)
|
|
35
|
-
f.close
|
|
36
|
-
|
|
37
|
-
@parsed_content.css('ol.merch-grid li').each do |xx|
|
|
33
|
+
@parsed_html.css('ol.merch-grid li').each do |xx|
|
|
38
34
|
unless xx.css('p.sold-out').empty?
|
|
39
35
|
logger.debug "That item is sold out =("
|
|
40
36
|
next
|
data/lib/sites/bsky.rb
CHANGED
|
@@ -107,14 +107,11 @@ class BskyAccount < BskyBase
|
|
|
107
107
|
did = _profile_to_did(@account)
|
|
108
108
|
path = "/xrpc/app.bsky.feed.getAuthorFeed?actor=#{did}&filter=posts_and_author_threads&limit=30"
|
|
109
109
|
resp = _api_get(path)
|
|
110
|
-
@
|
|
111
|
-
f = File.open("/tmp/qsd", 'w')
|
|
112
|
-
f.write(resp.body)
|
|
113
|
-
f.close
|
|
110
|
+
@parsed_json = JSON.parse(resp.body)
|
|
114
111
|
end
|
|
115
112
|
|
|
116
|
-
def
|
|
117
|
-
@
|
|
113
|
+
def extract_articles
|
|
114
|
+
@parsed_json['feed'].each do |p|
|
|
118
115
|
post = p['post']
|
|
119
116
|
text = post['record']['text']
|
|
120
117
|
next if @regex and (text !~ @regex)
|
|
@@ -165,11 +162,11 @@ class BskySearch < BskyBase
|
|
|
165
162
|
|
|
166
163
|
params = { "q" => "#danemark", "limit" => 30, "sort" => "top" }
|
|
167
164
|
resp = _api_get("/xrpc/app.bsky.feed.searchPosts", params: params.to_a, headers: headers)
|
|
168
|
-
@
|
|
165
|
+
@parsed_json = JSON.parse(resp.body)
|
|
169
166
|
end
|
|
170
167
|
|
|
171
|
-
def
|
|
172
|
-
@
|
|
168
|
+
def extract_articles
|
|
169
|
+
@parsed_json['posts'].each do |post|
|
|
173
170
|
add_article(_article_from_post(post))
|
|
174
171
|
end
|
|
175
172
|
end
|
data/lib/sites/postch.rb
CHANGED
|
@@ -16,6 +16,7 @@ class PostCH < Site::SimpleString
|
|
|
16
16
|
@mechanize = Mechanize.new()
|
|
17
17
|
@mechanize.user_agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:132.0) Gecko/20100101 Firefox/132.0'
|
|
18
18
|
@text_messages = {}
|
|
19
|
+
@parsed_json = nil
|
|
19
20
|
end
|
|
20
21
|
|
|
21
22
|
def code_to_message(code)
|
|
@@ -69,11 +70,11 @@ class PostCH < Site::SimpleString
|
|
|
69
70
|
resp = @mechanize.get("https://service.post.ch/ekp-web/api/shipment/id/#{identity}/events", nil, nil, headers)
|
|
70
71
|
|
|
71
72
|
json_content = JSON.parse(resp.body)
|
|
72
|
-
@
|
|
73
|
+
@parsed_json = []
|
|
73
74
|
|
|
74
75
|
json_content.each do |event|
|
|
75
76
|
event['description'] = code_to_message(event['eventCode'])
|
|
76
|
-
@
|
|
77
|
+
@parsed_json << event
|
|
77
78
|
end
|
|
78
79
|
end
|
|
79
80
|
|
|
@@ -89,7 +90,7 @@ class PostCH < Site::SimpleString
|
|
|
89
90
|
end
|
|
90
91
|
|
|
91
92
|
def get_content()
|
|
92
|
-
evs = @
|
|
93
|
+
evs = @parsed_json.map { |e|
|
|
93
94
|
e['timestamp'] = DateTime.strptime(e['timestamp'], "%Y-%m-%dT%H:%M:%S%Z")
|
|
94
95
|
e
|
|
95
96
|
}
|
data/lib/sites/postnl.rb
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
require_relative "../webwatchr/site"
|
|
2
|
+
|
|
3
|
+
# example:
|
|
4
|
+
#
|
|
5
|
+
# update PostNL do
|
|
6
|
+
# track_id "XX102917683NL"
|
|
7
|
+
# end
|
|
8
|
+
|
|
9
|
+
class PostNL < Site::SimpleString
|
|
10
|
+
require "net/http"
|
|
11
|
+
require "json"
|
|
12
|
+
|
|
13
|
+
def track_id(track_id)
|
|
14
|
+
# Sets the Track ID & URL
|
|
15
|
+
@track_id = track_id
|
|
16
|
+
@url = "https://www.postnl.post/track?barcodes=#{track_id}"
|
|
17
|
+
self
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def initialize
|
|
21
|
+
super()
|
|
22
|
+
@parsed_json = nil
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def pull_things()
|
|
26
|
+
resp = Net::HTTP.post(URI.parse("https://postnl.post/api/v1/auth/token"), nil, nil)
|
|
27
|
+
token = JSON.parse(resp.body)["access_token"]
|
|
28
|
+
|
|
29
|
+
resp = Net::HTTP.post(
|
|
30
|
+
URI.parse("https://postnl.post/api/v1/tracking-items"), {
|
|
31
|
+
"items" => ["CH188699083NL"],
|
|
32
|
+
"language_code" => "en"
|
|
33
|
+
}.to_json,
|
|
34
|
+
{
|
|
35
|
+
'Content-Type' => 'application/json',
|
|
36
|
+
'Authorization' => "Bearer #{token}"
|
|
37
|
+
}
|
|
38
|
+
)
|
|
39
|
+
@parsed_json = JSON.parse(resp.body)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def extract_content()
|
|
43
|
+
res = []
|
|
44
|
+
@parsed_json['data']['items'][0]['events'].each do |event|
|
|
45
|
+
msg = "#{event['datetime_local']}: #{event['status_description']}"
|
|
46
|
+
if event['country_code']
|
|
47
|
+
msg << " (#{event['country_code']})"
|
|
48
|
+
end
|
|
49
|
+
res << msg
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
return ResultObject.new(res.join(""))
|
|
53
|
+
end
|
|
54
|
+
end
|
data/lib/sites/songkick.rb
CHANGED
|
@@ -9,8 +9,8 @@ class Songkick < Site::Articles
|
|
|
9
9
|
return self
|
|
10
10
|
end
|
|
11
11
|
|
|
12
|
-
def
|
|
13
|
-
events = @
|
|
12
|
+
def extract_articles()
|
|
13
|
+
events = @parsed_html.css('ol.event-listings')[0]
|
|
14
14
|
events.css('li').each do |event|
|
|
15
15
|
j = JSON.parse(event.css('script')[0].text)[0]
|
|
16
16
|
date = j["startDate"]
|
data/lib/webwatchr/main.rb
CHANGED
|
@@ -20,20 +20,20 @@ module Webwatchr
|
|
|
20
20
|
OptionParser.new { |o|
|
|
21
21
|
o.banner = "WebWatchr is a script to poll websites and alert on changes.
|
|
22
22
|
Exemple uses:
|
|
23
|
-
* Updates all
|
|
23
|
+
* Updates all registered Sites, and compare against internal state, and update it.
|
|
24
24
|
ruby #{__FILE__}
|
|
25
|
-
* Updates
|
|
26
|
-
ruby #{__FILE__} -s
|
|
25
|
+
* Updates one specific Site, lue, and compare against internal state, and update it.
|
|
26
|
+
ruby #{__FILE__} -s SiteClass
|
|
27
27
|
|
|
28
28
|
Usage: ruby #{__FILE__} "
|
|
29
|
-
o.on("-sSITE", "--site=SITE", "Run
|
|
29
|
+
o.on("-sSITE", "--site=SITE", "Run Webwatchr on one site only. It has to be the name of the class for that site.") do |val|
|
|
30
30
|
PARAMS[:site] = val
|
|
31
31
|
PARAMS[:mode] = :single
|
|
32
32
|
end
|
|
33
33
|
o.on("-v", "--verbose", "Be verbose (output to STDOUT instead of logfile") do
|
|
34
34
|
PARAMS[:verbose] = true
|
|
35
35
|
end
|
|
36
|
-
o.on("-t", "--test", "Check website and
|
|
36
|
+
o.on("-t", "--test", "Check website (ignoring wait time) and show what we've parsed") do
|
|
37
37
|
PARAMS[:test] = true
|
|
38
38
|
end
|
|
39
39
|
o.on("-h", "--help", "Prints this help") {
|
data/lib/webwatchr/site.rb
CHANGED
|
@@ -6,6 +6,22 @@ require "net/http"
|
|
|
6
6
|
require "nokogiri"
|
|
7
7
|
require_relative "./logger"
|
|
8
8
|
|
|
9
|
+
# Base class for a Site to be watched
|
|
10
|
+
#
|
|
11
|
+
# Handles pulling data from websites as well as storing the state and when to update next.
|
|
12
|
+
#
|
|
13
|
+
# == Overview
|
|
14
|
+
#
|
|
15
|
+
# - update() is called, which loads the saved state file
|
|
16
|
+
# - do_stuff() is called and checks whether or not we should update (aka: if the last time was long enough ago)
|
|
17
|
+
# - if it is time, we call pull_things(), which can be overloaded, but by default just ;
|
|
18
|
+
# - fetches @url, and stores it in @website_html
|
|
19
|
+
# - parses @website_html, with Nokogiri, into @parsed_html
|
|
20
|
+
# - calls extract_content(), which is the method that extract what we are interested in the webpage.
|
|
21
|
+
# Its results will get compared with the previous execution's results.
|
|
22
|
+
# This is the one you should reimplement at the very least (unless you want to compare against the whole HTML body).
|
|
23
|
+
# - get_diff() is the method that will do the comparison, and its return value, if not nil, will trigger alerting
|
|
24
|
+
# - Each Alerter object in @alerters will be called, if needed.
|
|
9
25
|
class Site
|
|
10
26
|
include Loggable
|
|
11
27
|
class ParseError < StandardError
|
|
@@ -17,7 +33,7 @@ class Site
|
|
|
17
33
|
HTML_HEADER = "<!DOCTYPE html>\n<meta charset=\"utf-8\">\n".freeze
|
|
18
34
|
DEFAULT_USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36'.freeze
|
|
19
35
|
|
|
20
|
-
attr_accessor :url, :alerters, :rand_sleep, :
|
|
36
|
+
attr_accessor :url, :alerters, :rand_sleep, :update_interval, :lastdir, :cache_dir, :state_file, :comment
|
|
21
37
|
|
|
22
38
|
attr_writer :name
|
|
23
39
|
|
|
@@ -55,7 +71,11 @@ class Site
|
|
|
55
71
|
@http_ver = 1
|
|
56
72
|
@rand_sleep = 0
|
|
57
73
|
@did_stuff = false
|
|
58
|
-
@
|
|
74
|
+
@update_interval = 3600
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def display_optional_state
|
|
78
|
+
puts "We parsed the website and extracted content #{@content}"
|
|
59
79
|
end
|
|
60
80
|
|
|
61
81
|
def set_http_header(key, value)
|
|
@@ -85,18 +105,21 @@ class Site
|
|
|
85
105
|
end
|
|
86
106
|
|
|
87
107
|
def generate_html_content()
|
|
88
|
-
|
|
108
|
+
raise StandardError, "We called generate_html_content, but there is no @content" unless @content
|
|
89
109
|
|
|
90
110
|
message_html = Site::HTML_HEADER.dup
|
|
91
111
|
message_html += @content
|
|
92
112
|
return message_html
|
|
93
113
|
end
|
|
94
114
|
|
|
95
|
-
# Helper methods to generate Telegram
|
|
115
|
+
# Helper methods to generate Telegram messages
|
|
96
116
|
def generate_telegram_message_pieces()
|
|
117
|
+
raise StandardError, "We called generate_telegram_message_pieces, but there is no @content" unless @content
|
|
118
|
+
|
|
97
119
|
return [@content]
|
|
98
120
|
end
|
|
99
121
|
|
|
122
|
+
# Uses Curb to query websites with HTTP/2
|
|
100
123
|
def fetch_url2(url)
|
|
101
124
|
require "curb"
|
|
102
125
|
|
|
@@ -153,7 +176,7 @@ class Site
|
|
|
153
176
|
end
|
|
154
177
|
response = http.request(req)
|
|
155
178
|
case response.code
|
|
156
|
-
when "301", "302"
|
|
179
|
+
when "301", "302", "303"
|
|
157
180
|
if max_redir == 0
|
|
158
181
|
raise Site::RedirectError
|
|
159
182
|
end
|
|
@@ -193,10 +216,6 @@ class Site
|
|
|
193
216
|
return html
|
|
194
217
|
end
|
|
195
218
|
|
|
196
|
-
def parse_content(html)
|
|
197
|
-
return parse_noko(html)
|
|
198
|
-
end
|
|
199
|
-
|
|
200
219
|
def parse_noko(html)
|
|
201
220
|
noko = Nokogiri::HTML(html)
|
|
202
221
|
meta = noko.css("meta")
|
|
@@ -224,13 +243,9 @@ class Site
|
|
|
224
243
|
end
|
|
225
244
|
end
|
|
226
245
|
|
|
246
|
+
# Takes the old state file, and updates it with the values passed in hash
|
|
227
247
|
def update_state_file(hash)
|
|
228
248
|
previous_state = load_state_file()
|
|
229
|
-
previous_state.update({
|
|
230
|
-
"time" => Time.now.to_i,
|
|
231
|
-
"url" => @url,
|
|
232
|
-
"wait" => @wait
|
|
233
|
-
})
|
|
234
249
|
state = previous_state.update(hash)
|
|
235
250
|
save_state_file(state)
|
|
236
251
|
end
|
|
@@ -238,20 +253,10 @@ class Site
|
|
|
238
253
|
def alert()
|
|
239
254
|
logger.debug "Alerting new stuff"
|
|
240
255
|
@alerters.each do |alerter|
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
def content()
|
|
246
|
-
unless @did_stuff
|
|
247
|
-
raise StandardError, 'Trying to access @content, but we have not pulled any data yet'
|
|
256
|
+
if @alert_only.empty? or @alert_only.include?(alerter.class::IDENTIFIER)
|
|
257
|
+
alerter.alert(self)
|
|
258
|
+
end
|
|
248
259
|
end
|
|
249
|
-
|
|
250
|
-
return @content
|
|
251
|
-
end
|
|
252
|
-
|
|
253
|
-
def get_content()
|
|
254
|
-
return @html_content
|
|
255
260
|
end
|
|
256
261
|
|
|
257
262
|
def alert_only(alerter_identifiers)
|
|
@@ -262,14 +267,11 @@ class Site
|
|
|
262
267
|
else
|
|
263
268
|
raise StandardError, "unknown type of provided alerter identifier #{alerter_identifiers}"
|
|
264
269
|
end
|
|
270
|
+
self
|
|
265
271
|
end
|
|
266
272
|
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
end
|
|
270
|
-
|
|
271
|
-
def get_new(_previous_content = nil)
|
|
272
|
-
@content = get_content()
|
|
273
|
+
# This method compares the previous stored content, with the new one, and returns what is new.
|
|
274
|
+
def get_diff()
|
|
273
275
|
return @content
|
|
274
276
|
end
|
|
275
277
|
|
|
@@ -279,81 +281,83 @@ class Site
|
|
|
279
281
|
md5 = Digest::MD5.hexdigest(@url)
|
|
280
282
|
@cache_dir = File.join(cache_dir, "cache-#{URI.parse(@url).hostname}-#{md5}")
|
|
281
283
|
@state_file = File.join(last_dir, "last-#{URI.parse(@url).hostname}-#{md5}")
|
|
282
|
-
state = load_state_file()
|
|
283
|
-
@wait = @every || state["wait"] || 60 * 60
|
|
284
284
|
@test = test
|
|
285
285
|
logger.debug "using #{@state_file} to store updates, and #{@cache_dir} for Cache"
|
|
286
286
|
|
|
287
287
|
do_stuff()
|
|
288
288
|
rescue Site::RedirectError
|
|
289
289
|
msg = "Error parsing page #{@url}, too many redirects"
|
|
290
|
-
msg += ". Will retry in #{@
|
|
290
|
+
msg += ". Will retry in #{@update_interval} + 30 minutes"
|
|
291
291
|
logger.error msg
|
|
292
292
|
warn msg
|
|
293
|
-
update_state_file({ "
|
|
293
|
+
update_state_file({ "time" => Time.now.to_i, "wait_at_least" => @update_interval + 30 * 60 })
|
|
294
294
|
rescue Site::ParseError => e
|
|
295
295
|
msg = "Error parsing page #{@url}"
|
|
296
296
|
if e.message
|
|
297
297
|
msg += " with error : #{e.message}"
|
|
298
298
|
end
|
|
299
|
-
msg += ". Will retry in #{@
|
|
299
|
+
msg += ". Will retry in #{@update_interval} + 30 minutes"
|
|
300
300
|
logger.error msg
|
|
301
301
|
warn msg
|
|
302
|
-
update_state_file({ "
|
|
302
|
+
update_state_file({ "time" => Time.now.to_i, "wait_at_least" => @update_interval + 30 * 60 })
|
|
303
303
|
rescue Errno::ECONNREFUSED, Net::ReadTimeout, OpenSSL::SSL::SSLError, Net::OpenTimeout => e
|
|
304
304
|
msg = "Network error on #{@url}"
|
|
305
305
|
if e.message
|
|
306
306
|
msg += " : #{e.message}"
|
|
307
307
|
end
|
|
308
|
-
msg += ". Will retry in #{@
|
|
308
|
+
msg += ". Will retry in #{@update_interval} + 30 minutes"
|
|
309
309
|
logger.error msg
|
|
310
310
|
warn msg
|
|
311
|
-
update_state_file({ "
|
|
311
|
+
update_state_file({ "time" => Time.now.to_i, "wait_at_least" => @update_interval + 30 * 60 })
|
|
312
|
+
end
|
|
313
|
+
|
|
314
|
+
def extract_content()
|
|
315
|
+
return @website_html
|
|
312
316
|
end
|
|
313
317
|
|
|
318
|
+
# By default, we pull html from the @url, we parse it with Nokogiri
|
|
314
319
|
def pull_things()
|
|
315
|
-
@
|
|
316
|
-
@
|
|
320
|
+
@website_html = fetch_url(@url)
|
|
321
|
+
@parsed_html = parse_noko(@website_html)
|
|
322
|
+
@content = extract_content()
|
|
317
323
|
end
|
|
318
324
|
|
|
319
325
|
def do_stuff()
|
|
320
|
-
|
|
326
|
+
# Prepare previous_state, with defaults, that can be overriden with what we may find in the state_file
|
|
321
327
|
previous_state = {
|
|
322
328
|
"time" => -9_999_999_999_999,
|
|
323
329
|
"content" => nil
|
|
324
330
|
}
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
331
|
+
old_state = load_state_file()
|
|
332
|
+
delay_between_updates = old_state["wait_at_least"] || @update_interval || 60
|
|
333
|
+
if old_state
|
|
334
|
+
previous_state.update(old_state)
|
|
328
335
|
end
|
|
329
|
-
|
|
330
|
-
if
|
|
336
|
+
|
|
337
|
+
if @test or (Time.now().to_i >= previous_state['time'] + delay_between_updates)
|
|
331
338
|
if @rand_sleep > 0 and not @test
|
|
332
339
|
logger.info "Time to update #{@url} (sleeping #{@rand_sleep} sec)"
|
|
333
340
|
sleep(@rand_sleep)
|
|
334
341
|
else
|
|
335
342
|
logger.info "Time to update #{@url}"
|
|
336
343
|
end
|
|
344
|
+
|
|
337
345
|
pull_things()
|
|
338
|
-
|
|
346
|
+
|
|
347
|
+
new_stuff = get_diff()
|
|
339
348
|
@did_stuff = true
|
|
340
349
|
if new_stuff
|
|
341
350
|
if @test
|
|
342
351
|
logger.info "Would have alerted with new stuff:\n#{new_stuff}"
|
|
343
352
|
else
|
|
344
353
|
alert()
|
|
345
|
-
update_state_file({
|
|
346
|
-
"content" => new_stuff,
|
|
347
|
-
"previous_content" => previous_content
|
|
348
|
-
})
|
|
349
354
|
end
|
|
350
355
|
else
|
|
351
356
|
logger.info "Nothing new for #{@url}"
|
|
352
357
|
if @test
|
|
353
|
-
|
|
358
|
+
display_optional_state()
|
|
354
359
|
end
|
|
355
360
|
end
|
|
356
|
-
update_state_file({}) unless @test
|
|
357
361
|
else
|
|
358
362
|
@did_stuff = true
|
|
359
363
|
logger.info "Too soon to update #{@url}"
|
|
@@ -397,15 +401,18 @@ class Site
|
|
|
397
401
|
end
|
|
398
402
|
end
|
|
399
403
|
|
|
400
|
-
def
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
raise StandardError, "The result of get_content() should be a ResultObject if the Site class is SimpleString" unless @content.class < ResultObject
|
|
404
|
-
else
|
|
405
|
-
@content = get_content()
|
|
406
|
-
end
|
|
404
|
+
def get_diff()
|
|
405
|
+
@content ||= extract_content()
|
|
406
|
+
previous_content = load_state_file()["content"]
|
|
407
407
|
return nil if @content == previous_content
|
|
408
408
|
|
|
409
|
+
update_state_file(
|
|
410
|
+
{
|
|
411
|
+
"time" => Time.now.to_i,
|
|
412
|
+
"wait_at_least" => @update_interval,
|
|
413
|
+
"content" => @content
|
|
414
|
+
}
|
|
415
|
+
)
|
|
409
416
|
return @content
|
|
410
417
|
end
|
|
411
418
|
|
|
@@ -426,87 +433,118 @@ class Site
|
|
|
426
433
|
end
|
|
427
434
|
end
|
|
428
435
|
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
diff_html
|
|
437
|
-
diff_html += "
|
|
438
|
-
diff_html +=
|
|
439
|
-
diff_html += "</body
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
diff_html
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
return
|
|
459
|
-
|
|
436
|
+
## For use when you want to parse a site, and are only interested is having
|
|
437
|
+
# a nice looking "Diff" between the new and the previous state
|
|
438
|
+
# class DiffString < SimpleString
|
|
439
|
+
# begin
|
|
440
|
+
# require "diffy"
|
|
441
|
+
#
|
|
442
|
+
# def generate_html_content()
|
|
443
|
+
# diff_html = Site::HTML_HEADER.dup
|
|
444
|
+
# diff_html += "<head><style>"
|
|
445
|
+
# diff_html += Diffy::CSS
|
|
446
|
+
# diff_html += "</style><body>"
|
|
447
|
+
# diff_html += @diffed.to_s(:html)
|
|
448
|
+
# diff_html += "</body></html>"
|
|
449
|
+
# return diff_html
|
|
450
|
+
# end
|
|
451
|
+
#
|
|
452
|
+
# def get_differ(previous, new)
|
|
453
|
+
# return Diffy::Diff.new(previous, new)
|
|
454
|
+
# end
|
|
455
|
+
# rescue LoadError
|
|
456
|
+
# require "test/unit/diff"
|
|
457
|
+
# def generate_html_content()
|
|
458
|
+
# diff_html = Site::HTML_HEADER.dup
|
|
459
|
+
# diff_html += @diffed.to_s
|
|
460
|
+
# diff_html += "</body></html>"
|
|
461
|
+
# return diff_html
|
|
462
|
+
# end
|
|
463
|
+
#
|
|
464
|
+
# def get_differ(previous, new)
|
|
465
|
+
# return new unless previous
|
|
466
|
+
#
|
|
467
|
+
# return Test::Unit::Diff.unified(previous, new)
|
|
468
|
+
# end
|
|
469
|
+
# end
|
|
470
|
+
#
|
|
471
|
+
# def get_diff()
|
|
472
|
+
# new_stuff = nil
|
|
473
|
+
# @content = extract_content()
|
|
474
|
+
# unless @content
|
|
475
|
+
# return nil
|
|
476
|
+
# end
|
|
477
|
+
#
|
|
478
|
+
# if @content != previous_content
|
|
479
|
+
# @diffed = get_differ(previous_content, @content)
|
|
480
|
+
# new_stuff = @diffed.to_s
|
|
481
|
+
# end
|
|
482
|
+
# return new_stuff
|
|
483
|
+
# end
|
|
484
|
+
# end
|
|
485
|
+
|
|
486
|
+
## For use when you want to parse a site that has Articles
|
|
487
|
+
# And you want to know when knew, previously unseen Articles appear.
|
|
488
|
+
# For example, a shop.
|
|
489
|
+
#
|
|
490
|
+
# You need to make sure to call add_article() with instances of Article.
|
|
491
|
+
class Articles < Site
|
|
492
|
+
class Article < Hash
|
|
460
493
|
end
|
|
461
494
|
|
|
462
|
-
def
|
|
463
|
-
|
|
464
|
-
@
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
end
|
|
495
|
+
def initialize
|
|
496
|
+
super
|
|
497
|
+
@articles = []
|
|
498
|
+
@found_articles = 0
|
|
499
|
+
end
|
|
468
500
|
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
end
|
|
473
|
-
return new_stuff
|
|
501
|
+
def content
|
|
502
|
+
log.error("Do not use site.content on an instance of Site::Articles in #{caller}")
|
|
503
|
+
return @articles
|
|
474
504
|
end
|
|
475
|
-
end
|
|
476
505
|
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
super
|
|
480
|
-
@content = []
|
|
506
|
+
def display_optional_state
|
|
507
|
+
puts "We parsed the website and extracted #{@found_articles} articles"
|
|
481
508
|
end
|
|
482
509
|
|
|
483
|
-
def validate(
|
|
484
|
-
|
|
510
|
+
def validate(article)
|
|
511
|
+
id = article['id']
|
|
512
|
+
raise StandardError, "Article needs an \"id\", which is used as identifier" unless id
|
|
485
513
|
|
|
486
|
-
id = item["id"]
|
|
487
514
|
raise StandardError, "\"id\" key needs to be a String and not #{id.class}" unless id.is_a?(String)
|
|
488
515
|
end
|
|
489
516
|
|
|
490
|
-
def add_article(
|
|
491
|
-
logger.debug "Found article #{
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
517
|
+
def add_article(article)
|
|
518
|
+
logger.debug "Found article #{article['id']}"
|
|
519
|
+
@found_articles += 1
|
|
520
|
+
validate(article)
|
|
521
|
+
article['_timestamp'] = Time.now().to_i
|
|
522
|
+
@articles << article unless @articles.map { |art| art['id'] }.include?(article['id'])
|
|
523
|
+
end
|
|
524
|
+
|
|
525
|
+
def extract_articles()
|
|
526
|
+
raise StandardError, "Please implement extract_articles(). Use @parsed_html and call add_article()."
|
|
495
527
|
end
|
|
496
528
|
|
|
497
|
-
def
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
unless @content
|
|
529
|
+
def get_diff()
|
|
530
|
+
extract_articles()
|
|
531
|
+
unless @articles
|
|
501
532
|
return nil
|
|
502
533
|
end
|
|
503
534
|
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
new_stuff = @
|
|
535
|
+
new_stuff = @articles
|
|
536
|
+
previous_articles = load_state_file()["articles"]
|
|
537
|
+
if previous_articles
|
|
538
|
+
previous_ids = previous_articles.map { |art| art['id'] }
|
|
539
|
+
new_stuff = @articles.delete_if { |article| previous_ids.include?(article['id']) }
|
|
509
540
|
end
|
|
541
|
+
update_state_file(
|
|
542
|
+
{
|
|
543
|
+
"time" => Time.now.to_i,
|
|
544
|
+
"wait_at_least" => @update_interval,
|
|
545
|
+
"articles" => (previous_articles || []).concat(@articles)
|
|
546
|
+
}
|
|
547
|
+
)
|
|
510
548
|
if (not new_stuff) or new_stuff.empty?
|
|
511
549
|
return nil
|
|
512
550
|
end
|
|
@@ -514,37 +552,28 @@ class Site
|
|
|
514
552
|
return new_stuff
|
|
515
553
|
end
|
|
516
554
|
|
|
555
|
+
# Here we want to store every article we ever found
|
|
517
556
|
def update_state_file(hash)
|
|
518
|
-
hash_content = hash["content"]
|
|
519
|
-
hash.delete("content")
|
|
520
557
|
previous_state = load_state_file()
|
|
521
|
-
previous_state.update({
|
|
522
|
-
"time" => Time.now.to_i,
|
|
523
|
-
"url" => @url,
|
|
524
|
-
"wait" => @wait
|
|
525
|
-
})
|
|
526
558
|
state = previous_state.update(hash)
|
|
527
|
-
if hash_content
|
|
528
|
-
(previous_state["content"] ||= []).concat(hash_content)
|
|
529
|
-
end
|
|
530
559
|
save_state_file(state)
|
|
531
560
|
end
|
|
532
561
|
|
|
533
562
|
def generate_html_content()
|
|
534
563
|
message_html = Site::HTML_HEADER.dup
|
|
535
564
|
message_html << "<ul style='list-style-type: none;'>\n"
|
|
536
|
-
@
|
|
537
|
-
msg = "<li id='#{
|
|
538
|
-
if
|
|
539
|
-
msg += "<a href='#{
|
|
565
|
+
@articles.each do |article|
|
|
566
|
+
msg = "<li id='#{article['id']}'>"
|
|
567
|
+
if article['url']
|
|
568
|
+
msg += "<a href='#{article['url']}'>"
|
|
540
569
|
end
|
|
541
|
-
if
|
|
542
|
-
msg += "<img style='width:100px' src='#{
|
|
570
|
+
if article["img_src"]
|
|
571
|
+
msg += "<img style='width:100px' src='#{article['img_src']}'/>"
|
|
543
572
|
end
|
|
544
|
-
if
|
|
545
|
-
msg +=
|
|
573
|
+
if article["title"]
|
|
574
|
+
msg += article['title'].to_s
|
|
546
575
|
end
|
|
547
|
-
if
|
|
576
|
+
if article["url"]
|
|
548
577
|
msg += "</a>"
|
|
549
578
|
end
|
|
550
579
|
msg += "</li>\n"
|
|
@@ -556,16 +585,16 @@ class Site
|
|
|
556
585
|
|
|
557
586
|
def generate_telegram_message_pieces()
|
|
558
587
|
msg_pieces = []
|
|
559
|
-
@
|
|
560
|
-
line =
|
|
561
|
-
if
|
|
588
|
+
@articles.each do |article|
|
|
589
|
+
line = article["title"]
|
|
590
|
+
if article["url"]
|
|
562
591
|
if line
|
|
563
|
-
line += ": #{
|
|
592
|
+
line += ": #{article['url']}"
|
|
564
593
|
else
|
|
565
|
-
line =
|
|
594
|
+
line = article["url"]
|
|
566
595
|
end
|
|
567
596
|
|
|
568
|
-
line += ": #{
|
|
597
|
+
line += ": #{article['url']}"
|
|
569
598
|
end
|
|
570
599
|
msg_pieces << line
|
|
571
600
|
end
|
data/tests/helpers.rb
CHANGED
|
@@ -27,6 +27,12 @@ class TestAlerter < Webwatchr::Alerting::Base
|
|
|
27
27
|
end
|
|
28
28
|
|
|
29
29
|
def alert(site)
|
|
30
|
-
|
|
30
|
+
if site.is_a?(Site::Articles)
|
|
31
|
+
@result = site.articles
|
|
32
|
+
elsif site.is_a?(Site::SimpleString)
|
|
33
|
+
@result = site.content
|
|
34
|
+
else
|
|
35
|
+
raise StandardError, "Unknown Site class being tests: #{site.class}"
|
|
36
|
+
end
|
|
31
37
|
end
|
|
32
38
|
end
|
data/tests/infra_test.rb
CHANGED
|
@@ -45,6 +45,10 @@ class BaseWebrickTest < Test::Unit::TestCase
|
|
|
45
45
|
restart_webrick()
|
|
46
46
|
end
|
|
47
47
|
|
|
48
|
+
def cleanup
|
|
49
|
+
FileUtils.remove_entry_secure(@workdir) if File.directory?(@workdir)
|
|
50
|
+
end
|
|
51
|
+
|
|
48
52
|
def teardown()
|
|
49
53
|
@webrick.stop
|
|
50
54
|
@serv_thread.join
|
|
@@ -53,7 +57,6 @@ class BaseWebrickTest < Test::Unit::TestCase
|
|
|
53
57
|
f.puts ""
|
|
54
58
|
end
|
|
55
59
|
end
|
|
56
|
-
FileUtils.remove_entry_secure(@workdir)
|
|
57
60
|
end
|
|
58
61
|
end
|
|
59
62
|
|
|
@@ -61,11 +64,11 @@ class TestSimpleStringSite < BaseWebrickTest
|
|
|
61
64
|
class TestStringSite < Site::SimpleString
|
|
62
65
|
def initialize
|
|
63
66
|
super()
|
|
64
|
-
@
|
|
67
|
+
@update_interval = 200
|
|
65
68
|
end
|
|
66
69
|
|
|
67
|
-
def
|
|
68
|
-
return ResultObject.new(@
|
|
70
|
+
def extract_content()
|
|
71
|
+
return ResultObject.new(@parsed_html.css("div.content").text)
|
|
69
72
|
end
|
|
70
73
|
end
|
|
71
74
|
|
|
@@ -77,31 +80,27 @@ class TestSimpleStringSite < BaseWebrickTest
|
|
|
77
80
|
f.write whole_html
|
|
78
81
|
end
|
|
79
82
|
url = "http://localhost:#{TEST_CONFIG[:wwwport]}/#{TEST_CONFIG[:content_is_string_file]}"
|
|
80
|
-
wait = 10 * 60
|
|
81
83
|
|
|
82
84
|
c = TestStringSite.new
|
|
83
85
|
c.url = url
|
|
84
86
|
a = TestAlerter.new()
|
|
85
87
|
c.alerters = [a]
|
|
86
88
|
assert { c.load_state_file() == {} }
|
|
87
|
-
assert { c.should_update?(-9_999_999_999_999) }
|
|
88
|
-
assert { c.should_update?((Time.now() - wait + 30).to_i) == false }
|
|
89
89
|
html = c.fetch_url(url)
|
|
90
90
|
assert { whole_html == html }
|
|
91
91
|
assert { c.parse_noko(html).css("title").text == "test" }
|
|
92
92
|
cache_dir = File.join(@workdir, "cache")
|
|
93
93
|
last_dir = File.join(@workdir, ".lasts")
|
|
94
|
+
c.state_file = File.join(last_dir, "last-localhost-2182cd5c8685baed48f692ed72d7a89f")
|
|
94
95
|
FileUtils.mkdir_p(cache_dir)
|
|
95
96
|
FileUtils.mkdir_p(last_dir)
|
|
96
97
|
c.update(cache_dir: cache_dir, last_dir: last_dir)
|
|
97
|
-
assert { c.state_file.end_with?("last-localhost-2182cd5c8685baed48f692ed72d7a89f") }
|
|
98
98
|
expected_error = "DEBUG -- TestSimpleStringSite::TestStringSite: Alerting new stuff"
|
|
99
99
|
last_error = @logger_test_io.string.split("\n")[-1]
|
|
100
100
|
assert { last_error.end_with?(expected_error) }
|
|
101
101
|
first_pass_content = Site::HTML_HEADER + content_html
|
|
102
|
-
assert { c.content.to_html == content_html }
|
|
103
102
|
assert { c.generate_html_content == first_pass_content }
|
|
104
|
-
assert { a.result == c.content }
|
|
103
|
+
assert { a.result.message == c.content.message }
|
|
105
104
|
|
|
106
105
|
File.open(File.join(TEST_CONFIG[:wwwroot], TEST_CONFIG[:content_is_string_file]), "w+") do |f|
|
|
107
106
|
f.write whole_html.gsub("</div>", " new ! </div>")
|
|
@@ -121,7 +120,7 @@ class TestSimpleStringSite < BaseWebrickTest
|
|
|
121
120
|
assert { c.generate_html_content.nil? }
|
|
122
121
|
assert { c.name == url }
|
|
123
122
|
|
|
124
|
-
c.
|
|
123
|
+
c.update_state_file({ "time" => Time.now.to_i - 300 })
|
|
125
124
|
c.update(cache_dir: cache_dir, last_dir: last_dir)
|
|
126
125
|
expected_error = "DEBUG -- TestSimpleStringSite::TestStringSite: Alerting new stuff"
|
|
127
126
|
last_error = @logger_test_io.string.split("\n")[-1]
|
|
@@ -131,9 +130,10 @@ class TestSimpleStringSite < BaseWebrickTest
|
|
|
131
130
|
assert { c.name == url }
|
|
132
131
|
result_last = JSON.parse(File.read(c.state_file), create_additions: true)
|
|
133
132
|
result_last.delete("time")
|
|
134
|
-
assert { result_last["url"] == url }
|
|
135
133
|
assert { result_last["content"].message == "#{content_html} new ! " }
|
|
136
|
-
assert { result_last["
|
|
134
|
+
assert { result_last["wait_at_least"] == 200 }
|
|
135
|
+
ensure
|
|
136
|
+
cleanup
|
|
137
137
|
end
|
|
138
138
|
end
|
|
139
139
|
|
|
@@ -141,14 +141,14 @@ class TestArraySites < BaseWebrickTest
|
|
|
141
141
|
class TestArraySite < Site::Articles
|
|
142
142
|
def initialize
|
|
143
143
|
super()
|
|
144
|
-
@
|
|
144
|
+
@update_interval = 200
|
|
145
145
|
end
|
|
146
146
|
|
|
147
|
-
def
|
|
147
|
+
def extract_articles()
|
|
148
148
|
res = []
|
|
149
|
-
@
|
|
149
|
+
@parsed_html.css("div").each do |x|
|
|
150
150
|
a, b = x.text.split("-").map(&:strip)
|
|
151
|
-
add_article(
|
|
151
|
+
add_article(Article["id" => a, "url" => a, "title" => b])
|
|
152
152
|
end
|
|
153
153
|
return res
|
|
154
154
|
end
|
|
@@ -160,15 +160,12 @@ class TestArraySites < BaseWebrickTest
|
|
|
160
160
|
f.write whole_html
|
|
161
161
|
end
|
|
162
162
|
url = "http://localhost:#{TEST_CONFIG[:wwwport]}/#{TEST_CONFIG[:content_is_array_file]}"
|
|
163
|
-
wait = 10 * 60
|
|
164
163
|
|
|
165
164
|
c = TestArraySite.new
|
|
166
165
|
c.url = url
|
|
167
166
|
a = TestAlerter.new()
|
|
168
167
|
c.alerters = [a]
|
|
169
168
|
assert { c.load_state_file() == {} }
|
|
170
|
-
assert { c.should_update?(-9_999_999_999_999) }
|
|
171
|
-
assert { !c.should_update?((Time.now() - wait + 30).to_i) }
|
|
172
169
|
html = c.fetch_url(url)
|
|
173
170
|
assert { html == whole_html }
|
|
174
171
|
assert { c.parse_noko(html).css("title").text == "test" }
|
|
@@ -189,17 +186,15 @@ class TestArraySites < BaseWebrickTest
|
|
|
189
186
|
"<li id='fi'><a href='fi'>fu</a></li>",
|
|
190
187
|
"</ul>"
|
|
191
188
|
].join("\n")
|
|
192
|
-
c.
|
|
189
|
+
c.articles.each { |x| x.delete('_timestamp') }
|
|
193
190
|
assert {
|
|
194
|
-
c.
|
|
191
|
+
c.articles == [
|
|
195
192
|
{ "id" => "lol", "url" => "lol", "title" => "lilo" },
|
|
196
193
|
{ "id" => "fi", "url" => "fi", "title" => "fu" }
|
|
197
194
|
]
|
|
198
195
|
}
|
|
199
196
|
assert { c.generate_html_content == expected_html }
|
|
200
197
|
|
|
201
|
-
result = ""
|
|
202
|
-
|
|
203
198
|
File.open(File.join(TEST_CONFIG[:wwwroot], TEST_CONFIG[:content_is_array_file]), "a+") do |f|
|
|
204
199
|
f.write "<div>new! - new </div>"
|
|
205
200
|
end
|
|
@@ -207,46 +202,39 @@ class TestArraySites < BaseWebrickTest
|
|
|
207
202
|
c.url = url
|
|
208
203
|
a = TestAlerter.new()
|
|
209
204
|
c.alerters = [a]
|
|
210
|
-
# Second run don't
|
|
205
|
+
# Second run don't do anything because we shouldn't rerun
|
|
211
206
|
c.update(cache_dir: cache_dir, last_dir: last_dir)
|
|
212
207
|
expected_error = "INFO -- TestArraySites::TestArraySite: Too soon to update #{url}"
|
|
213
208
|
last_error = @logger_test_io.string.split("\n")[-1]
|
|
214
209
|
assert { last_error.end_with?(expected_error) }
|
|
215
|
-
assert { result == "" }
|
|
216
210
|
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
c.content.each { |x| x.delete('_timestamp') }
|
|
211
|
+
c.update_state_file({ "time" => Time.now.to_i - 300 })
|
|
220
212
|
|
|
221
|
-
c.every = 0
|
|
222
213
|
# This time we set new things, and wait is 0 so we are good to go
|
|
223
214
|
c.update(cache_dir: cache_dir, last_dir: last_dir)
|
|
224
215
|
expected_error = "DEBUG -- TestArraySites::TestArraySite: Alerting new stuff"
|
|
225
216
|
last_error = @logger_test_io.string.split("\n")[-1]
|
|
226
217
|
assert { last_error.end_with?(expected_error) }
|
|
218
|
+
|
|
227
219
|
expected_html = Site::HTML_HEADER.dup + [
|
|
228
220
|
"<ul style='list-style-type: none;'>",
|
|
229
221
|
"<li id='new!'><a href='new!'>new</a></li>",
|
|
230
222
|
"</ul>"
|
|
231
223
|
].join("\n")
|
|
232
224
|
|
|
233
|
-
c.
|
|
234
|
-
assert { c.
|
|
225
|
+
c.articles.each { |x| x.delete('_timestamp') }
|
|
226
|
+
assert { c.articles == [{ "id" => "new!", "url" => "new!", "title" => "new" }] }
|
|
235
227
|
assert { c.generate_html_content == expected_html }
|
|
236
|
-
expected_last = {
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
{ "id" => "new!", "title" => "new", "url" => "new!" }] }
|
|
228
|
+
expected_last = {
|
|
229
|
+
"wait_at_least" => 200,
|
|
230
|
+
"articles" => [{ "id" => "lol", "title" => "lilo", "url" => "lol" },
|
|
231
|
+
{ "id" => "fi", "title" => "fu", "url" => "fi" },
|
|
232
|
+
{ "id" => "new!", "title" => "new", "url" => "new!" }]
|
|
233
|
+
}
|
|
243
234
|
result_last = JSON.parse(File.read(c.state_file))
|
|
244
235
|
result_last.delete("time")
|
|
245
|
-
result_last["
|
|
246
|
-
|
|
247
|
-
end
|
|
248
|
-
result_last["previous_content"].each do |item|
|
|
249
|
-
item.delete("_timestamp")
|
|
236
|
+
result_last["articles"].each do |article|
|
|
237
|
+
article.delete("_timestamp")
|
|
250
238
|
end
|
|
251
239
|
assert { expected_last == result_last }
|
|
252
240
|
|
|
@@ -256,8 +244,9 @@ class TestArraySites < BaseWebrickTest
|
|
|
256
244
|
c.url = url
|
|
257
245
|
a = TestAlerter.new()
|
|
258
246
|
c.alerters = [a]
|
|
259
|
-
|
|
260
|
-
|
|
247
|
+
# Now, we don't call the alerters because we have no new things
|
|
248
|
+
c.state_file = File.join(last_dir, "last-localhost-35e711989b197f20f3d4936e91a2c079")
|
|
249
|
+
c.update_state_file({ "time" => Time.now.to_i - 300 })
|
|
261
250
|
c.update(cache_dir: cache_dir, last_dir: last_dir)
|
|
262
251
|
expected_error = "INFO -- TestArraySites::TestArraySite: Nothing new for #{url}"
|
|
263
252
|
last_error = @logger_test_io.string.split("\n")[-1]
|
|
@@ -267,5 +256,7 @@ class TestArraySites < BaseWebrickTest
|
|
|
267
256
|
"</ul>"
|
|
268
257
|
].join("\n")
|
|
269
258
|
assert { result == "" }
|
|
259
|
+
ensure
|
|
260
|
+
cleanup
|
|
270
261
|
end
|
|
271
262
|
end
|
data/webwatchr.gemspec
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: webwatchr
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0.
|
|
4
|
+
version: 0.0.3
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Renzo
|
|
@@ -22,6 +22,7 @@ files:
|
|
|
22
22
|
- lib/sites/bandcamp.rb
|
|
23
23
|
- lib/sites/bsky.rb
|
|
24
24
|
- lib/sites/postch.rb
|
|
25
|
+
- lib/sites/postnl.rb
|
|
25
26
|
- lib/sites/songkick.rb
|
|
26
27
|
- lib/webwatchr.rb
|
|
27
28
|
- lib/webwatchr/alerting.rb
|