wcc 0.0.1 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. data/README.md +47 -0
  2. data/lib/wcc.rb +47 -46
  3. metadata +5 -4
data/README.md ADDED
@@ -0,0 +1,47 @@
1
+ web change checker
2
+ ==================
3
+
4
+ This is a simple ruby script to track changes of websites and get notified via mail on
5
+ change with configurable scope of adresses per website. All mails contain a unified diff
6
+ from old content to new content so minor changes produce only few lines of text even on large sites.
7
+
8
+ Note: wcc relies on native `diff` command to produce the unified diff shown in mails -
9
+ plans are to remove this dependency by using [something like this](https://github.com/samg/diffy) later...
10
+
11
+ Usage
12
+ -----
13
+
14
+ wcc is packaged as a gem named 'wcc' and provides it's main script as ´wcc´ via the
15
+ command line. It can invoked by hand or automatically via *cron* on a server environment.
16
+
17
+ For using wcc you need to specify some options:
18
+
19
+ * either via the command line (see `wcc -h`)
20
+ * or in a configuration file in [YAML](https://secure.wikimedia.org/wikipedia/en/wiki/YAML) format
21
+
22
+ The location of the configuration file (usually called 'conf.yml' or something like this)
23
+ can itself be given on command line as last argument. Each option has an hard-coded default
24
+ (e.g. the configuration file name is assumed to be './conf.yml'). Command line options
25
+ overwrite configuration file entries.
26
+
27
+ The core option is the From: mail address and the SMTP configuration for sending emails.
28
+ It is highly encouraged to use the configuration file for all rare changing things
29
+ (even because you have to specify the list of tracked sites there anyways).
30
+
31
+ An example crontab entry that runs wcc every 10 minutes might look like this:
32
+
33
+ */10 * * * * root cd /path/to/dir/with/conf;./wcc
34
+
35
+ By default wcc only outputs ERROR messages to avoid your cron daemon spammin' around.
36
+ It is recommended to place 'conf.yml' (and optionally the 'filter.d') within an separate
37
+ directory and use `cd` in cron entry.
38
+
39
+ Setup
40
+ -----
41
+
42
+ You need Ruby (preferably version 1.8.7) and Rubygems installed
43
+ (consider using [rvm](http://beginrescueend.com/)). Install wcc:
44
+
45
+ gem install wcc
46
+
47
+ (If you *don't* use [rvm](http://beginrescueend.com/) you should add a 'sudo'.)
data/lib/wcc.rb CHANGED
@@ -17,9 +17,11 @@ require 'yaml'
17
17
  require 'htmlentities'
18
18
 
19
19
  class String
20
+ # Remove all HTML <tags> with at least one character name and
21
+ # decode all HTML entities into utf-8 characters.
22
+ #
23
+ # @return [String] stripped string
20
24
  def strip_html
21
- # remove all HTML <tags> with at least 1 character name
22
- # and decode all HTML entities into UTF-8 characters
23
25
  HTMLEntities.new.decode(self.gsub(/<[^>]+>/, ' '))
24
26
  end
25
27
  end
@@ -69,18 +71,19 @@ module WCC
69
71
  @options = {}
70
72
 
71
73
  OptionParser.new do |opts|
72
- opts.banner = "Usage: ruby wcc.rb [options] [config-yaml-file]"
74
+ opts.banner = "Usage: ruby wcc.rb [options] [config-yaml-file]"
75
+ opts.banner += "\nOptions:\n"
73
76
  opts.on('-v', '--verbose', 'Output more information') do self[:verbose] = true end
74
77
  opts.on('-d', '--debug', 'Enable debug mode') do self[:debug] = true end
75
- opts.on('-o', '--dir DIR', 'Save required files to DIR') do |dir| self[:dir] = dir end
76
- opts.on('-s', '--simulate', 'Check for update but does not save any data') do self[:simulate] = true end
77
- opts.on('-c', '--clean', 'Removes all hash and diff files') do self[:clean] = true end
78
- opts.on('-t', '--tag TAG', 'Sets TAG used in output') do |t| self[:tag] = t end
79
- opts.on('-n', '--no-mails', 'Does not send any emails') do self[:nomails] = true end
80
- opts.on('-f', '--from MAIL', 'Set sender mail address') do |m| self[:from_mail] = m end
81
- opts.on('--host HOST', 'Sets SMTP host') do |h| self[:host] = h end
82
- opts.on('--port PORT', 'Sets SMTP port') do |p| self[:port] = p end
83
- opts.on('--show-config', 'Show config after loading config file.') do self[:show_config] = true end
78
+ opts.on('-o', '--dir DIR', 'Save hash and diff files to DIR') do |dir| self[:dir] = dir end
79
+ opts.on('-s', '--simulate', 'Check for update but do not save hash or diff files') do self[:simulate] = true end
80
+ opts.on('-c', '--clean', 'Remove all saved hash and diff files') do self[:clean] = true end
81
+ opts.on('-t', '--tag TAG', 'Set TAG used in output') do |t| self[:tag] = t end
82
+ opts.on('-n', '--no-mails', 'Do not send any emails') do self[:nomails] = true end
83
+ opts.on('-f', '--from MAIL', 'Set From: mail address') do |m| self[:from_mail] = m end
84
+ opts.on('--host HOST', 'Set SMTP host') do |h| self[:host] = h end
85
+ opts.on('--port PORT', 'Set SMTP port') do |p| self[:port] = p end
86
+ opts.on('--show-config', 'Show config after loading config file (debug purposes)') do self[:show_config] = true end
84
87
  opts.on('-h', '-?', '--help', 'Display this screen') do
85
88
  puts opts
86
89
  exit
@@ -94,7 +97,7 @@ module WCC
94
97
  WCC.logger.level = Logger::INFO if self[:verbose]
95
98
  WCC.logger.level = Logger::DEBUG if self[:debug]
96
99
 
97
- WCC.logger.formatter = MyFormatter.new((self[:verbose] or self[:debug]))
100
+ WCC.logger.formatter = LogFormatter.new((self[:verbose] or self[:debug]))
98
101
 
99
102
  # main
100
103
  WCC.logger.info "No config file given, using default 'conf.yml' file" if ARGV.length == 0
@@ -187,7 +190,7 @@ module WCC
187
190
  filterrefs,
188
191
  yaml_site['auth'] || {},
189
192
  cookie)
190
- end if yaml
193
+ end if not yaml.nil?
191
194
 
192
195
  WCC.logger.debug @sites.length.to_s + (@sites.length == 1 ? ' site' : ' sites') + " loaded\n" +
193
196
  @sites.map { |s| " #{s.uri.host.to_s}\n url: #{s.uri.to_s}\n id: #{s.id}" }.join("\n")
@@ -274,6 +277,25 @@ module WCC
274
277
  @content = content
275
278
  File.open(Conf.file(@id + '.site'), 'w') { |f| f.write(@content) } unless Conf.simulate?
276
279
  end
280
+
281
+ def fetch
282
+ http = Net::HTTP.new(@uri.host, @uri.port)
283
+ if @uri.is_a?(URI::HTTPS)
284
+ http.use_ssl = true
285
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE
286
+ end
287
+ http.start do |http|
288
+ req = Net::HTTP::Get.new(@uri.request_uri)
289
+ if @auth['type'] == 'basic'
290
+ WCC.logger.debug "Doing basic auth"
291
+ req.basic_auth(@auth['username'], @auth['password'])
292
+ end
293
+ if not @cookie.nil?
294
+ req.add_field("Cookie", @cookie)
295
+ end
296
+ http.request(req)
297
+ end
298
+ end
277
299
  end
278
300
 
279
301
  class MailAddress
@@ -353,17 +375,14 @@ module WCC
353
375
  return true if filters.nil?
354
376
 
355
377
  WCC.logger.info "Testing with filters: #{filters.join(', ')}"
356
-
357
- filters.each do |filterref|
358
- block = @@filters[filterref.id]
359
-
378
+ filters.each do |fref|
379
+ block = @@filters[fref.id]
360
380
  if block.nil?
361
- WCC.logger.error "Requested filter '#{filterref.id}' not found, skipping it."
381
+ WCC.logger.error "Requested filter '#{fref.id}' not found, skipping it."
362
382
  next
363
383
  end
364
-
365
- if not block.call(data, filterref.arguments)
366
- WCC.logger.info "Filter #{filterref.id} failed!"
384
+ if not block.call(data, fref.arguments)
385
+ WCC.logger.info "Filter #{fref.id} failed!"
367
386
  return false
368
387
  end
369
388
  end
@@ -371,7 +390,7 @@ module WCC
371
390
  end
372
391
  end
373
392
 
374
- class MyFormatter
393
+ class LogFormatter
375
394
  def initialize(use_color = true)
376
395
  @color = use_color
377
396
  end
@@ -398,32 +417,14 @@ module WCC
398
417
  end
399
418
 
400
419
  class Prog
401
-
402
- # TODO: move to Site
403
- def self.fetch(site)
404
- http = Net::HTTP.new(site.uri.host, site.uri.port)
405
- if site.uri.is_a?(URI::HTTPS)
406
- http.use_ssl = true
407
- http.verify_mode = OpenSSL::SSL::VERIFY_NONE
408
- end
409
- http.start do |http|
410
- req = Net::HTTP::Get.new(site.uri.request_uri)
411
- if site.auth['type'] == 'basic'
412
- WCC.logger.debug "Doing basic auth"
413
- req.basic_auth(site.auth['username'], site.auth['password'])
414
- end
415
- if not site.cookie.nil?
416
- req.add_field("Cookie", site.cookie)
417
- end
418
- http.request(req)
419
- end
420
- end
421
-
422
420
  def self.checkForUpdate(site)
423
421
  WCC.logger.info "Requesting '#{site.uri.to_s}'"
424
422
  begin
425
- res = fetch(site)
426
- rescue StandardError, Timeout::Error => ex
423
+ res = site.fetch
424
+ rescue Timeout::Error => ex
425
+ # don't claim on this
426
+ return false
427
+ rescue => ex
427
428
  WCC.logger.error "Cannot connect to #{site.uri.to_s} : #{ex.to_s}"
428
429
  return false
429
430
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wcc
3
3
  version: !ruby/object:Gem::Version
4
- hash: 29
4
+ hash: 25
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 1
10
- version: 0.0.1
9
+ - 3
10
+ version: 0.0.3
11
11
  platform: ruby
12
12
  authors:
13
13
  - Christian Nicolai
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-09-30 00:00:00 Z
18
+ date: 2011-10-01 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  name: htmlentities
@@ -42,6 +42,7 @@ extra_rdoc_files: []
42
42
  files:
43
43
  - bin/wcc
44
44
  - lib/wcc.rb
45
+ - README.md
45
46
  homepage: https://github.com/cmur2/wcc
46
47
  licenses: []
47
48