wcc 0.0.1 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +47 -0
- data/lib/wcc.rb +47 -46
- metadata +5 -4
data/README.md
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
web change checker
|
2
|
+
==================
|
3
|
+
|
4
|
+
This is a simple ruby script to track changes of websites and get notified via mail on
|
5
|
+
change with configurable scope of adresses per website. All mails contain a unified diff
|
6
|
+
from old content to new content so minor changes produce only few lines of text even on large sites.
|
7
|
+
|
8
|
+
Note: wcc relies on native `diff` command to produce the unified diff shown in mails -
|
9
|
+
plans are to remove this dependency by using [something like this](https://github.com/samg/diffy) later...
|
10
|
+
|
11
|
+
Usage
|
12
|
+
-----
|
13
|
+
|
14
|
+
wcc is packaged as a gem named 'wcc' and provides it's main script as ´wcc´ via the
|
15
|
+
command line. It can invoked by hand or automatically via *cron* on a server environment.
|
16
|
+
|
17
|
+
For using wcc you need to specify some options:
|
18
|
+
|
19
|
+
* either via the command line (see `wcc -h`)
|
20
|
+
* or in a configuration file in [YAML](https://secure.wikimedia.org/wikipedia/en/wiki/YAML) format
|
21
|
+
|
22
|
+
The location of the configuration file (usually called 'conf.yml' or something like this)
|
23
|
+
can itself be given on command line as last argument. Each option has an hard-coded default
|
24
|
+
(e.g. the configuration file name is assumed to be './conf.yml'). Command line options
|
25
|
+
overwrite configuration file entries.
|
26
|
+
|
27
|
+
The core option is the From: mail address and the SMTP configuration for sending emails.
|
28
|
+
It is highly encouraged to use the configuration file for all rare changing things
|
29
|
+
(even because you have to specify the list of tracked sites there anyways).
|
30
|
+
|
31
|
+
An example crontab entry that runs wcc every 10 minutes might look like this:
|
32
|
+
|
33
|
+
*/10 * * * * root cd /path/to/dir/with/conf;./wcc
|
34
|
+
|
35
|
+
By default wcc only outputs ERROR messages to avoid your cron daemon spammin' around.
|
36
|
+
It is recommended to place 'conf.yml' (and optionally the 'filter.d') within an separate
|
37
|
+
directory and use `cd` in cron entry.
|
38
|
+
|
39
|
+
Setup
|
40
|
+
-----
|
41
|
+
|
42
|
+
You need Ruby (preferably version 1.8.7) and Rubygems installed
|
43
|
+
(consider using [rvm](http://beginrescueend.com/)). Install wcc:
|
44
|
+
|
45
|
+
gem install wcc
|
46
|
+
|
47
|
+
(If you *don't* use [rvm](http://beginrescueend.com/) you should add a 'sudo'.)
|
data/lib/wcc.rb
CHANGED
@@ -17,9 +17,11 @@ require 'yaml'
|
|
17
17
|
require 'htmlentities'
|
18
18
|
|
19
19
|
class String
|
20
|
+
# Remove all HTML <tags> with at least one character name and
|
21
|
+
# decode all HTML entities into utf-8 characters.
|
22
|
+
#
|
23
|
+
# @return [String] stripped string
|
20
24
|
def strip_html
|
21
|
-
# remove all HTML <tags> with at least 1 character name
|
22
|
-
# and decode all HTML entities into UTF-8 characters
|
23
25
|
HTMLEntities.new.decode(self.gsub(/<[^>]+>/, ' '))
|
24
26
|
end
|
25
27
|
end
|
@@ -69,18 +71,19 @@ module WCC
|
|
69
71
|
@options = {}
|
70
72
|
|
71
73
|
OptionParser.new do |opts|
|
72
|
-
opts.banner =
|
74
|
+
opts.banner = "Usage: ruby wcc.rb [options] [config-yaml-file]"
|
75
|
+
opts.banner += "\nOptions:\n"
|
73
76
|
opts.on('-v', '--verbose', 'Output more information') do self[:verbose] = true end
|
74
77
|
opts.on('-d', '--debug', 'Enable debug mode') do self[:debug] = true end
|
75
|
-
opts.on('-o', '--dir DIR', 'Save
|
76
|
-
opts.on('-s', '--simulate', 'Check for update but
|
77
|
-
opts.on('-c', '--clean', '
|
78
|
-
opts.on('-t', '--tag TAG', '
|
79
|
-
opts.on('-n', '--no-mails', '
|
80
|
-
opts.on('-f', '--from MAIL', 'Set
|
81
|
-
opts.on('--host HOST', '
|
82
|
-
opts.on('--port PORT', '
|
83
|
-
opts.on('--show-config', 'Show config after loading config file
|
78
|
+
opts.on('-o', '--dir DIR', 'Save hash and diff files to DIR') do |dir| self[:dir] = dir end
|
79
|
+
opts.on('-s', '--simulate', 'Check for update but do not save hash or diff files') do self[:simulate] = true end
|
80
|
+
opts.on('-c', '--clean', 'Remove all saved hash and diff files') do self[:clean] = true end
|
81
|
+
opts.on('-t', '--tag TAG', 'Set TAG used in output') do |t| self[:tag] = t end
|
82
|
+
opts.on('-n', '--no-mails', 'Do not send any emails') do self[:nomails] = true end
|
83
|
+
opts.on('-f', '--from MAIL', 'Set From: mail address') do |m| self[:from_mail] = m end
|
84
|
+
opts.on('--host HOST', 'Set SMTP host') do |h| self[:host] = h end
|
85
|
+
opts.on('--port PORT', 'Set SMTP port') do |p| self[:port] = p end
|
86
|
+
opts.on('--show-config', 'Show config after loading config file (debug purposes)') do self[:show_config] = true end
|
84
87
|
opts.on('-h', '-?', '--help', 'Display this screen') do
|
85
88
|
puts opts
|
86
89
|
exit
|
@@ -94,7 +97,7 @@ module WCC
|
|
94
97
|
WCC.logger.level = Logger::INFO if self[:verbose]
|
95
98
|
WCC.logger.level = Logger::DEBUG if self[:debug]
|
96
99
|
|
97
|
-
WCC.logger.formatter =
|
100
|
+
WCC.logger.formatter = LogFormatter.new((self[:verbose] or self[:debug]))
|
98
101
|
|
99
102
|
# main
|
100
103
|
WCC.logger.info "No config file given, using default 'conf.yml' file" if ARGV.length == 0
|
@@ -187,7 +190,7 @@ module WCC
|
|
187
190
|
filterrefs,
|
188
191
|
yaml_site['auth'] || {},
|
189
192
|
cookie)
|
190
|
-
end if yaml
|
193
|
+
end if not yaml.nil?
|
191
194
|
|
192
195
|
WCC.logger.debug @sites.length.to_s + (@sites.length == 1 ? ' site' : ' sites') + " loaded\n" +
|
193
196
|
@sites.map { |s| " #{s.uri.host.to_s}\n url: #{s.uri.to_s}\n id: #{s.id}" }.join("\n")
|
@@ -274,6 +277,25 @@ module WCC
|
|
274
277
|
@content = content
|
275
278
|
File.open(Conf.file(@id + '.site'), 'w') { |f| f.write(@content) } unless Conf.simulate?
|
276
279
|
end
|
280
|
+
|
281
|
+
def fetch
|
282
|
+
http = Net::HTTP.new(@uri.host, @uri.port)
|
283
|
+
if @uri.is_a?(URI::HTTPS)
|
284
|
+
http.use_ssl = true
|
285
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
286
|
+
end
|
287
|
+
http.start do |http|
|
288
|
+
req = Net::HTTP::Get.new(@uri.request_uri)
|
289
|
+
if @auth['type'] == 'basic'
|
290
|
+
WCC.logger.debug "Doing basic auth"
|
291
|
+
req.basic_auth(@auth['username'], @auth['password'])
|
292
|
+
end
|
293
|
+
if not @cookie.nil?
|
294
|
+
req.add_field("Cookie", @cookie)
|
295
|
+
end
|
296
|
+
http.request(req)
|
297
|
+
end
|
298
|
+
end
|
277
299
|
end
|
278
300
|
|
279
301
|
class MailAddress
|
@@ -353,17 +375,14 @@ module WCC
|
|
353
375
|
return true if filters.nil?
|
354
376
|
|
355
377
|
WCC.logger.info "Testing with filters: #{filters.join(', ')}"
|
356
|
-
|
357
|
-
|
358
|
-
block = @@filters[filterref.id]
|
359
|
-
|
378
|
+
filters.each do |fref|
|
379
|
+
block = @@filters[fref.id]
|
360
380
|
if block.nil?
|
361
|
-
WCC.logger.error "Requested filter '#{
|
381
|
+
WCC.logger.error "Requested filter '#{fref.id}' not found, skipping it."
|
362
382
|
next
|
363
383
|
end
|
364
|
-
|
365
|
-
|
366
|
-
WCC.logger.info "Filter #{filterref.id} failed!"
|
384
|
+
if not block.call(data, fref.arguments)
|
385
|
+
WCC.logger.info "Filter #{fref.id} failed!"
|
367
386
|
return false
|
368
387
|
end
|
369
388
|
end
|
@@ -371,7 +390,7 @@ module WCC
|
|
371
390
|
end
|
372
391
|
end
|
373
392
|
|
374
|
-
class
|
393
|
+
class LogFormatter
|
375
394
|
def initialize(use_color = true)
|
376
395
|
@color = use_color
|
377
396
|
end
|
@@ -398,32 +417,14 @@ module WCC
|
|
398
417
|
end
|
399
418
|
|
400
419
|
class Prog
|
401
|
-
|
402
|
-
# TODO: move to Site
|
403
|
-
def self.fetch(site)
|
404
|
-
http = Net::HTTP.new(site.uri.host, site.uri.port)
|
405
|
-
if site.uri.is_a?(URI::HTTPS)
|
406
|
-
http.use_ssl = true
|
407
|
-
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
408
|
-
end
|
409
|
-
http.start do |http|
|
410
|
-
req = Net::HTTP::Get.new(site.uri.request_uri)
|
411
|
-
if site.auth['type'] == 'basic'
|
412
|
-
WCC.logger.debug "Doing basic auth"
|
413
|
-
req.basic_auth(site.auth['username'], site.auth['password'])
|
414
|
-
end
|
415
|
-
if not site.cookie.nil?
|
416
|
-
req.add_field("Cookie", site.cookie)
|
417
|
-
end
|
418
|
-
http.request(req)
|
419
|
-
end
|
420
|
-
end
|
421
|
-
|
422
420
|
def self.checkForUpdate(site)
|
423
421
|
WCC.logger.info "Requesting '#{site.uri.to_s}'"
|
424
422
|
begin
|
425
|
-
res = fetch
|
426
|
-
rescue
|
423
|
+
res = site.fetch
|
424
|
+
rescue Timeout::Error => ex
|
425
|
+
# don't claim on this
|
426
|
+
return false
|
427
|
+
rescue => ex
|
427
428
|
WCC.logger.error "Cannot connect to #{site.uri.to_s} : #{ex.to_s}"
|
428
429
|
return false
|
429
430
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wcc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 25
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 3
|
10
|
+
version: 0.0.3
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Christian Nicolai
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-
|
18
|
+
date: 2011-10-01 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
name: htmlentities
|
@@ -42,6 +42,7 @@ extra_rdoc_files: []
|
|
42
42
|
files:
|
43
43
|
- bin/wcc
|
44
44
|
- lib/wcc.rb
|
45
|
+
- README.md
|
45
46
|
homepage: https://github.com/cmur2/wcc
|
46
47
|
licenses: []
|
47
48
|
|