wcc 0.0.1 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +47 -0
- data/lib/wcc.rb +47 -46
- metadata +5 -4
data/README.md
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
web change checker
|
2
|
+
==================
|
3
|
+
|
4
|
+
This is a simple ruby script to track changes of websites and get notified via mail on
|
5
|
+
change with configurable scope of adresses per website. All mails contain a unified diff
|
6
|
+
from old content to new content so minor changes produce only few lines of text even on large sites.
|
7
|
+
|
8
|
+
Note: wcc relies on native `diff` command to produce the unified diff shown in mails -
|
9
|
+
plans are to remove this dependency by using [something like this](https://github.com/samg/diffy) later...
|
10
|
+
|
11
|
+
Usage
|
12
|
+
-----
|
13
|
+
|
14
|
+
wcc is packaged as a gem named 'wcc' and provides it's main script as ´wcc´ via the
|
15
|
+
command line. It can invoked by hand or automatically via *cron* on a server environment.
|
16
|
+
|
17
|
+
For using wcc you need to specify some options:
|
18
|
+
|
19
|
+
* either via the command line (see `wcc -h`)
|
20
|
+
* or in a configuration file in [YAML](https://secure.wikimedia.org/wikipedia/en/wiki/YAML) format
|
21
|
+
|
22
|
+
The location of the configuration file (usually called 'conf.yml' or something like this)
|
23
|
+
can itself be given on command line as last argument. Each option has an hard-coded default
|
24
|
+
(e.g. the configuration file name is assumed to be './conf.yml'). Command line options
|
25
|
+
overwrite configuration file entries.
|
26
|
+
|
27
|
+
The core option is the From: mail address and the SMTP configuration for sending emails.
|
28
|
+
It is highly encouraged to use the configuration file for all rare changing things
|
29
|
+
(even because you have to specify the list of tracked sites there anyways).
|
30
|
+
|
31
|
+
An example crontab entry that runs wcc every 10 minutes might look like this:
|
32
|
+
|
33
|
+
*/10 * * * * root cd /path/to/dir/with/conf;./wcc
|
34
|
+
|
35
|
+
By default wcc only outputs ERROR messages to avoid your cron daemon spammin' around.
|
36
|
+
It is recommended to place 'conf.yml' (and optionally the 'filter.d') within an separate
|
37
|
+
directory and use `cd` in cron entry.
|
38
|
+
|
39
|
+
Setup
|
40
|
+
-----
|
41
|
+
|
42
|
+
You need Ruby (preferably version 1.8.7) and Rubygems installed
|
43
|
+
(consider using [rvm](http://beginrescueend.com/)). Install wcc:
|
44
|
+
|
45
|
+
gem install wcc
|
46
|
+
|
47
|
+
(If you *don't* use [rvm](http://beginrescueend.com/) you should add a 'sudo'.)
|
data/lib/wcc.rb
CHANGED
@@ -17,9 +17,11 @@ require 'yaml'
|
|
17
17
|
require 'htmlentities'
|
18
18
|
|
19
19
|
class String
|
20
|
+
# Remove all HTML <tags> with at least one character name and
|
21
|
+
# decode all HTML entities into utf-8 characters.
|
22
|
+
#
|
23
|
+
# @return [String] stripped string
|
20
24
|
def strip_html
|
21
|
-
# remove all HTML <tags> with at least 1 character name
|
22
|
-
# and decode all HTML entities into UTF-8 characters
|
23
25
|
HTMLEntities.new.decode(self.gsub(/<[^>]+>/, ' '))
|
24
26
|
end
|
25
27
|
end
|
@@ -69,18 +71,19 @@ module WCC
|
|
69
71
|
@options = {}
|
70
72
|
|
71
73
|
OptionParser.new do |opts|
|
72
|
-
opts.banner =
|
74
|
+
opts.banner = "Usage: ruby wcc.rb [options] [config-yaml-file]"
|
75
|
+
opts.banner += "\nOptions:\n"
|
73
76
|
opts.on('-v', '--verbose', 'Output more information') do self[:verbose] = true end
|
74
77
|
opts.on('-d', '--debug', 'Enable debug mode') do self[:debug] = true end
|
75
|
-
opts.on('-o', '--dir DIR', 'Save
|
76
|
-
opts.on('-s', '--simulate', 'Check for update but
|
77
|
-
opts.on('-c', '--clean', '
|
78
|
-
opts.on('-t', '--tag TAG', '
|
79
|
-
opts.on('-n', '--no-mails', '
|
80
|
-
opts.on('-f', '--from MAIL', 'Set
|
81
|
-
opts.on('--host HOST', '
|
82
|
-
opts.on('--port PORT', '
|
83
|
-
opts.on('--show-config', 'Show config after loading config file
|
78
|
+
opts.on('-o', '--dir DIR', 'Save hash and diff files to DIR') do |dir| self[:dir] = dir end
|
79
|
+
opts.on('-s', '--simulate', 'Check for update but do not save hash or diff files') do self[:simulate] = true end
|
80
|
+
opts.on('-c', '--clean', 'Remove all saved hash and diff files') do self[:clean] = true end
|
81
|
+
opts.on('-t', '--tag TAG', 'Set TAG used in output') do |t| self[:tag] = t end
|
82
|
+
opts.on('-n', '--no-mails', 'Do not send any emails') do self[:nomails] = true end
|
83
|
+
opts.on('-f', '--from MAIL', 'Set From: mail address') do |m| self[:from_mail] = m end
|
84
|
+
opts.on('--host HOST', 'Set SMTP host') do |h| self[:host] = h end
|
85
|
+
opts.on('--port PORT', 'Set SMTP port') do |p| self[:port] = p end
|
86
|
+
opts.on('--show-config', 'Show config after loading config file (debug purposes)') do self[:show_config] = true end
|
84
87
|
opts.on('-h', '-?', '--help', 'Display this screen') do
|
85
88
|
puts opts
|
86
89
|
exit
|
@@ -94,7 +97,7 @@ module WCC
|
|
94
97
|
WCC.logger.level = Logger::INFO if self[:verbose]
|
95
98
|
WCC.logger.level = Logger::DEBUG if self[:debug]
|
96
99
|
|
97
|
-
WCC.logger.formatter =
|
100
|
+
WCC.logger.formatter = LogFormatter.new((self[:verbose] or self[:debug]))
|
98
101
|
|
99
102
|
# main
|
100
103
|
WCC.logger.info "No config file given, using default 'conf.yml' file" if ARGV.length == 0
|
@@ -187,7 +190,7 @@ module WCC
|
|
187
190
|
filterrefs,
|
188
191
|
yaml_site['auth'] || {},
|
189
192
|
cookie)
|
190
|
-
end if yaml
|
193
|
+
end if not yaml.nil?
|
191
194
|
|
192
195
|
WCC.logger.debug @sites.length.to_s + (@sites.length == 1 ? ' site' : ' sites') + " loaded\n" +
|
193
196
|
@sites.map { |s| " #{s.uri.host.to_s}\n url: #{s.uri.to_s}\n id: #{s.id}" }.join("\n")
|
@@ -274,6 +277,25 @@ module WCC
|
|
274
277
|
@content = content
|
275
278
|
File.open(Conf.file(@id + '.site'), 'w') { |f| f.write(@content) } unless Conf.simulate?
|
276
279
|
end
|
280
|
+
|
281
|
+
def fetch
|
282
|
+
http = Net::HTTP.new(@uri.host, @uri.port)
|
283
|
+
if @uri.is_a?(URI::HTTPS)
|
284
|
+
http.use_ssl = true
|
285
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
286
|
+
end
|
287
|
+
http.start do |http|
|
288
|
+
req = Net::HTTP::Get.new(@uri.request_uri)
|
289
|
+
if @auth['type'] == 'basic'
|
290
|
+
WCC.logger.debug "Doing basic auth"
|
291
|
+
req.basic_auth(@auth['username'], @auth['password'])
|
292
|
+
end
|
293
|
+
if not @cookie.nil?
|
294
|
+
req.add_field("Cookie", @cookie)
|
295
|
+
end
|
296
|
+
http.request(req)
|
297
|
+
end
|
298
|
+
end
|
277
299
|
end
|
278
300
|
|
279
301
|
class MailAddress
|
@@ -353,17 +375,14 @@ module WCC
|
|
353
375
|
return true if filters.nil?
|
354
376
|
|
355
377
|
WCC.logger.info "Testing with filters: #{filters.join(', ')}"
|
356
|
-
|
357
|
-
|
358
|
-
block = @@filters[filterref.id]
|
359
|
-
|
378
|
+
filters.each do |fref|
|
379
|
+
block = @@filters[fref.id]
|
360
380
|
if block.nil?
|
361
|
-
WCC.logger.error "Requested filter '#{
|
381
|
+
WCC.logger.error "Requested filter '#{fref.id}' not found, skipping it."
|
362
382
|
next
|
363
383
|
end
|
364
|
-
|
365
|
-
|
366
|
-
WCC.logger.info "Filter #{filterref.id} failed!"
|
384
|
+
if not block.call(data, fref.arguments)
|
385
|
+
WCC.logger.info "Filter #{fref.id} failed!"
|
367
386
|
return false
|
368
387
|
end
|
369
388
|
end
|
@@ -371,7 +390,7 @@ module WCC
|
|
371
390
|
end
|
372
391
|
end
|
373
392
|
|
374
|
-
class
|
393
|
+
class LogFormatter
|
375
394
|
def initialize(use_color = true)
|
376
395
|
@color = use_color
|
377
396
|
end
|
@@ -398,32 +417,14 @@ module WCC
|
|
398
417
|
end
|
399
418
|
|
400
419
|
class Prog
|
401
|
-
|
402
|
-
# TODO: move to Site
|
403
|
-
def self.fetch(site)
|
404
|
-
http = Net::HTTP.new(site.uri.host, site.uri.port)
|
405
|
-
if site.uri.is_a?(URI::HTTPS)
|
406
|
-
http.use_ssl = true
|
407
|
-
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
408
|
-
end
|
409
|
-
http.start do |http|
|
410
|
-
req = Net::HTTP::Get.new(site.uri.request_uri)
|
411
|
-
if site.auth['type'] == 'basic'
|
412
|
-
WCC.logger.debug "Doing basic auth"
|
413
|
-
req.basic_auth(site.auth['username'], site.auth['password'])
|
414
|
-
end
|
415
|
-
if not site.cookie.nil?
|
416
|
-
req.add_field("Cookie", site.cookie)
|
417
|
-
end
|
418
|
-
http.request(req)
|
419
|
-
end
|
420
|
-
end
|
421
|
-
|
422
420
|
def self.checkForUpdate(site)
|
423
421
|
WCC.logger.info "Requesting '#{site.uri.to_s}'"
|
424
422
|
begin
|
425
|
-
res = fetch
|
426
|
-
rescue
|
423
|
+
res = site.fetch
|
424
|
+
rescue Timeout::Error => ex
|
425
|
+
# don't claim on this
|
426
|
+
return false
|
427
|
+
rescue => ex
|
427
428
|
WCC.logger.error "Cannot connect to #{site.uri.to_s} : #{ex.to_s}"
|
428
429
|
return false
|
429
430
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wcc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 25
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 3
|
10
|
+
version: 0.0.3
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Christian Nicolai
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-
|
18
|
+
date: 2011-10-01 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
name: htmlentities
|
@@ -42,6 +42,7 @@ extra_rdoc_files: []
|
|
42
42
|
files:
|
43
43
|
- bin/wcc
|
44
44
|
- lib/wcc.rb
|
45
|
+
- README.md
|
45
46
|
homepage: https://github.com/cmur2/wcc
|
46
47
|
licenses: []
|
47
48
|
|