wcc 2.2.0 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. data/assets/conf.yml +1 -0
  2. data/lib/wcc.rb +214 -167
  3. data/lib/wcc/version.rb +1 -1
  4. metadata +4 -4
@@ -6,6 +6,7 @@ conf:
6
6
  # tag: wcc
7
7
  # filterd: ./filter.d
8
8
  # templated: ./template.d
9
+ # stats: yes
9
10
  # # default config assuming local mail server
10
11
  # email:
11
12
  # smtp:
data/lib/wcc.rb CHANGED
@@ -100,6 +100,7 @@ module WCC
100
100
  :simulate => false,
101
101
  :clean => false,
102
102
  :nomails => false,
103
+ :stats => false,
103
104
  # when you want to use ./tmp it must be writeable
104
105
  :cache_dir => '/var/tmp/wcc',
105
106
  :tag => 'wcc',
@@ -175,6 +176,8 @@ module WCC
175
176
  @options[:filter_dir] ||= val
176
177
  when 'templated'
177
178
  @options[:template_dir] ||= val
179
+ when 'stats'
180
+ @options[:stats] ||= val
178
181
  else
179
182
  if not Notificators.mappings.include?(key)
180
183
  plugin_name = "wcc-#{key}-notificator"
@@ -341,166 +344,23 @@ module WCC
341
344
  end
342
345
 
343
346
  class Prog
344
- def self.checkForUpdate(site)
345
- WCC.logger.info "Requesting '#{site.uri.to_s}'"
346
- begin
347
- res = site.fetch
348
- rescue Timeout::Error => ex
349
- # don't claim on this
350
- return false
351
- rescue => ex
352
- WCC.logger.error "Cannot connect to #{site.uri.to_s} : #{ex.to_s}"
353
- return false
354
- end
355
- if res.kind_of?(Net::HTTPOK)
356
- # be happy!
357
- elsif res.kind_of?(Net::HTTPMovedPermanently)
358
- loc = res['Location']
359
- if loc.nil?
360
- WCC.logger.error "Site #{site.uri.to_s} moved permanently, skippong it - no new location given."
361
- else
362
- WCC.logger.error "Site #{site.uri.to_s} moved permanently to '#{loc}', skipping it - please update your conf.yml adequately!"
363
- end
364
- return false
365
- elsif res.kind_of?(Net::HTTPSeeOther) or res.kind_of?(Net::HTTPTemporaryRedirect)
366
- loc = URI.parse(res['Location'])
367
- WCC.logger.warn "Redirect: requesting '#{loc.to_s}'"
368
- res = site.fetch_redirect(loc)
369
- if not res.kind_of?(Net::HTTPOK)
370
- WCC.logger.error "Redirected site #{loc.to_s} returned #{res.code} code, skipping it."
371
- WCC.logger.error "Headers: #{res.to_hash.inspect}"
372
- return false
373
- end
374
- elsif res.kind_of?(Net::HTTPUnauthorized)
375
- WCC.logger.error "Site #{site.uri.to_s} demands authentication for '#{res['www-authenticate']}', skipping it - consider using 'auth:' option in your conf.yml."
376
- return false
377
- elsif res.kind_of?(Net::HTTPNotFound)
378
- WCC.logger.error "Site #{site.uri.to_s} not found, skipping it."
379
- return false
380
- elsif res.kind_of?(Net::HTTPForbidden)
381
- WCC.logger.error "Site #{site.uri.to_s} forbids access, skipping it."
382
- return false
383
- elsif res.kind_of?(Net::HTTPInternalServerError)
384
- WCC.logger.error "Site #{site.uri.to_s} has internal errors, skipping it."
385
- return false
386
- elsif res.kind_of?(Net::HTTPServiceUnavailable)
387
- #retry_after = res['Retry-After']
388
- WCC.logger.warn "Site #{site.uri.to_s} currently not available, skipping it."
389
- return false
390
- else
391
- WCC.logger.error "Site #{site.uri.to_s} returned #{res.code} code, skipping it."
392
- WCC.logger.error "Headers: #{res.to_hash.inspect}"
393
- return false
394
- end
395
-
396
- new_content = res.body
397
-
398
- # detect encoding from http header, meta element, default utf-8
399
- # do not use utf-8 regex because it will fail on non utf-8 pages
400
- encoding = (res['content-type'].to_s.match(/;\s*charset=([A-Za-z0-9-]*)/i).to_a[1] ||
401
- new_content.match(/<meta.*charset=([a-zA-Z0-9-]*).*/i).to_a[1]).to_s.downcase || 'utf-8'
402
-
403
- WCC.logger.info "Encoding is '#{encoding}'"
404
-
405
- # convert to utf-8
406
- begin
407
- new_content = Iconv.conv('utf-8', encoding, new_content)
408
- rescue => ex
409
- WCC.logger.error "Cannot convert site from '#{encoding}': #{ex.to_s}"
410
- return false
411
- end
412
-
413
- # strip html
414
- new_content = new_content.strip_html if site.strip_html?
415
- new_hash = Digest::MD5.hexdigest(new_content)
416
-
417
- WCC.logger.debug "Compare hashes\n old: #{site.hash.to_s}\n new: #{new_hash.to_s}"
418
- return false if new_hash == site.hash
419
-
420
- # do not try diff or anything if site was never checked before
421
- if site.new?
422
- site.hash, site.content = new_hash, new_content
423
-
424
- # signal that no diff was posible
425
- diff = nil
426
- else
427
- # save old site to tmp file
428
- old_site_file = Tempfile.new("wcc-#{site.id}-")
429
- old_site_file.write(site.content)
430
- old_site_file.close
431
-
432
- # calculate labels before updating
433
- old_label = "OLD (%s)" % File.mtime(Conf.file(site.id + ".md5")).strftime(DIFF_TIME_FMT)
434
- new_label = "NEW (%s)" % Time.now.strftime(DIFF_TIME_FMT)
435
-
436
- site.hash, site.content = new_hash, new_content
437
-
438
- # diff between OLD and NEW
439
- diff = %x[diff -U 1 --label "#{old_label}" --label "#{new_label}" #{old_site_file.path} #{Conf.file(site.id + '.site')}]
440
- end
441
-
442
- # construct the data made available to filters and templates
443
- data = OpenStruct.new
444
- data.site = site
445
- data.diff = diff.nil? ? nil : WCC::Differ.new(diff)
446
- data.tag = Conf[:tag]
447
-
448
- # HACK: there *was* an update but no notification is required
449
- return false if not Filters.accept(data, site.filters)
450
-
451
- site.notify.each do |name|
452
- rec = Conf.recipients[name]
453
- if rec.nil?
454
- WCC.logger.error "Could not notify recipient #{name} - not found!"
455
- else
456
- rec.each { |way| way.notify!(data) }
457
- end
458
- end
459
-
460
- true
461
- end
462
-
463
- # main
464
347
  def self.run!
465
- # first use of Conf initializes it
466
- WCC.logger = Logger.new(STDOUT)
467
-
468
348
  # make sure logger is correctly configured
349
+ WCC.logger = Logger.new(STDOUT)
350
+ # first use of Conf initializes it
469
351
  Conf.instance
470
352
 
471
- # create cache dir for hash and diff files
472
- Dir.mkdir(Conf[:cache_dir]) unless File.directory?(Conf[:cache_dir])
473
-
474
- if(Conf[:clean])
475
- WCC.logger.warn "Removing hash and diff files..."
476
- Dir.foreach(Conf[:cache_dir]) do |f|
477
- File.delete(Conf.file(f)) if f =~ /^.*\.(md5|site)$/
478
- end
479
- cache_file = Conf.file('cache.yml')
480
- WCC.logger.warn "Removing timestamp cache..."
481
- File.delete(cache_file) if File.exists?(cache_file)
482
- Prog.exit 1
483
- end
484
-
485
- # read filter.d
486
- Dir[File.join(Conf[:filter_dir], '*.rb')].each { |file| require file }
353
+ create_cache_dir
354
+ clean_cache_dir if Conf[:clean]
355
+ load_filters
356
+ load_timestamps
487
357
 
488
- # timestamps
489
- cache_file = Conf.file('cache.yml')
490
- if File.exists?(cache_file)
491
- WCC.logger.debug "Load timestamps from '#{cache_file}'"
492
-
493
- # may be *false* if file is empty
494
- yaml = YAML.load_file(cache_file)
495
-
496
- if not yaml
497
- WCC.logger.info "No timestamps loaded"
498
- else
499
- @@timestamps = yaml['timestamps']
500
- end
501
- else
502
- @@timestamps = {}
503
- end
358
+ # stats
359
+ @@stats = {
360
+ 'nruns' => 1,
361
+ 'nsites' => 0, 'nnotifications' => 0, 'nerrors' => 0,
362
+ 'nlines' => 0, 'nhunks' => 0
363
+ }
504
364
 
505
365
  Conf.sites.each do |site|
506
366
  ts_old = get_timestamp(site)
@@ -510,24 +370,22 @@ module WCC
510
370
  WCC.logger.info "Skipping check for #{site.uri.host.to_s} due to check #{ts_diff} minute#{ts_diff == 1 ? '' : 's'} ago."
511
371
  next
512
372
  end
513
- if checkForUpdate(site)
373
+ case checkForUpdate(site)
374
+ when :update
514
375
  WCC.logger.warn "#{site.uri.host.to_s} has an update!"
515
- else
376
+ when :noupdate
516
377
  WCC.logger.info "#{site.uri.host.to_s} is unchanged"
378
+ when :error
379
+ @@stats['nerrors'] += 1
517
380
  end
518
381
  update_timestamp(site, ts_new)
519
382
  end
520
383
 
521
- # save timestamps
522
- File.open(cache_file, 'w+') do |f| YAML.dump({"timestamps" => @@timestamps}, f) end
523
-
524
- # shut down notificators
525
- Notificators.mappings.each do |name,klass|
526
- WCC.logger.debug "Shut down #{klass}"
527
- klass.shut_down
528
- end
384
+ save_timestamps
385
+ update_stats if Conf[:stats]
386
+ shut_down_notificators
529
387
  end
530
-
388
+
531
389
  # Attempts to read the named template file from template.d
532
390
  # and converts it into ERB.
533
391
  #
@@ -543,7 +401,7 @@ module WCC
543
401
  end
544
402
  nil
545
403
  end
546
-
404
+
547
405
  # Attempts to write the given raw content to the named template file
548
406
  # in template.d. This should be used to create initial template files on demand
549
407
  # and will work only when file does not already exist.
@@ -574,5 +432,194 @@ module WCC
574
432
  def self.update_timestamp(site, t)
575
433
  @@timestamps[site.uri.to_s] = t
576
434
  end
435
+
436
+ def self.create_cache_dir
437
+ Dir.mkdir(Conf[:cache_dir]) unless File.directory?(Conf[:cache_dir])
438
+ end
439
+
440
+ def self.clean_cache_dir
441
+ WCC.logger.warn "Removing hash and diff files..."
442
+ Dir.foreach(Conf[:cache_dir]) do |f|
443
+ File.delete(Conf.file(f)) if f =~ /^.*\.(md5|site)$/
444
+ end
445
+ # special files
446
+ cache_file = Conf.file('cache.yml')
447
+ WCC.logger.warn "Removing timestamp cache..."
448
+ File.delete(cache_file) if File.exists?(cache_file)
449
+ stats_file = Conf.file('stats.yml')
450
+ WCC.logger.warn "Removing stats file..."
451
+ File.delete(stats_file) if File.exists?(stats_file)
452
+ Prog.exit 1
453
+ end
454
+
455
+ def self.load_filters
456
+ Dir[File.join(Conf[:filter_dir], '*.rb')].each do |file|
457
+ require file
458
+ end
459
+ end
460
+
461
+ def self.load_timestamps
462
+ cache_file = Conf.file('cache.yml')
463
+ @@timestamps = {}
464
+ if File.exists?(cache_file)
465
+ WCC.logger.debug "Load timestamps from '#{cache_file}'"
466
+ # may be *false* if file is empty
467
+ yaml = YAML.load_file(cache_file)
468
+ if not yaml
469
+ WCC.logger.warn "No timestamps loaded"
470
+ else
471
+ @@timestamps = yaml['timestamps']
472
+ end
473
+ end
474
+ end
475
+
476
+ def self.save_timestamps
477
+ cache_file = Conf.file('cache.yml')
478
+ File.open(cache_file, 'w+') do |f| YAML.dump({"timestamps" => @@timestamps}, f) end
479
+ end
480
+
481
+ def self.update_stats
482
+ stats_file = Conf.file('stats.yml')
483
+ if File.exists?(stats_file)
484
+ WCC.logger.debug "Load stats from '#{stats_file}'"
485
+ yaml = YAML.load_file(stats_file)
486
+ if not yaml
487
+ WCC.logger.warn "No stats loaded"
488
+ else
489
+ # merge stats infos
490
+ @@stats.each do |k,v| @@stats[k] += yaml['stats'][k] end
491
+ end
492
+ end
493
+ File.open(stats_file, 'w+') do |f| YAML.dump({"stats" => @@stats}, f) end
494
+ end
495
+
496
+ def self.shut_down_notificators
497
+ Notificators.mappings.each do |name,klass|
498
+ WCC.logger.debug "Shut down #{klass}"
499
+ klass.shut_down
500
+ end
501
+ end
502
+
503
+ def self.checkForUpdate(site)
504
+ WCC.logger.info "Requesting '#{site.uri.to_s}'"
505
+ begin
506
+ res = site.fetch
507
+ rescue Timeout::Error => ex
508
+ return :noupdate # don't claim on this
509
+ rescue => ex
510
+ WCC.logger.error "Cannot connect to #{site.uri.to_s} : #{ex.to_s}"
511
+ return :error
512
+ end
513
+ return :error if handle_http_errors(res, site)
514
+
515
+ new_content = get_utf8_body(res, site)
516
+ return :error if new_content.nil?
517
+
518
+ # strip html
519
+ new_content = new_content.strip_html if site.strip_html?
520
+
521
+ new_hash = Digest::MD5.hexdigest(new_content)
522
+ WCC.logger.debug "Compare hashes\n old: #{site.hash.to_s}\n new: #{new_hash.to_s}"
523
+ return :noupdate if new_hash == site.hash
524
+
525
+ # do not try diff or anything if site was never checked before
526
+ if site.new?
527
+ site.hash, site.content = new_hash, new_content
528
+ diff = nil # no diff possible
529
+ else
530
+ # save old site to tmp file
531
+ old_site_file = Tempfile.open("wcc-#{site.id}-")
532
+ old_site_file.write(site.content)
533
+ old_site_file.close
534
+ old_label = "OLD (%s)" % File.mtime(Conf.file(site.id + '.md5')).strftime(DIFF_TIME_FMT)
535
+ new_label = "NEW (%s)" % Time.now.strftime(DIFF_TIME_FMT)
536
+ # save new site
537
+ site.hash, site.content = new_hash, new_content
538
+ # diff between OLD and NEW
539
+ diff = %x[diff -U 1 --label "#{old_label}" --label "#{new_label}" #{old_site_file.path} #{Conf.file(site.id + '.site')}]
540
+ end
541
+
542
+ # construct the data made available to filters and templates
543
+ data = OpenStruct.new(:site => site, :tag => Conf[:tag],
544
+ :diff => diff.nil? ? nil : WCC::Differ.new(diff))
545
+
546
+ @@stats['nsites'] += 1
547
+ if not data.diff.nil?
548
+ @@stats['nlines'] += data.diff.nlinesc
549
+ @@stats['nhunks'] += data.diff.nhunks
550
+ end
551
+
552
+ # HACK: there *was* an update but no notification is required
553
+ return :noupdate if not Filters.accept(data, site.filters)
554
+
555
+ site.notify.each do |name|
556
+ rec = Conf.recipients[name]
557
+ if rec.nil?
558
+ WCC.logger.error "Could not notify recipient #{name} - not found!"
559
+ else
560
+ @@stats['nnotifications'] += 1
561
+ rec.each { |way| way.notify!(data) }
562
+ end
563
+ end
564
+
565
+ :update
566
+ end
567
+
568
+ def self.handle_http_errors(res, site)
569
+ return false if res.kind_of?(Net::HTTPOK)
570
+ if res.kind_of?(Net::HTTPMovedPermanently)
571
+ loc = res['Location']
572
+ if loc.nil?
573
+ WCC.logger.error "Site #{site.uri.to_s} moved permanently, skipping it - no new location given."
574
+ else
575
+ WCC.logger.error "Site #{site.uri.to_s} moved permanently to '#{loc}', skipping it - please update your conf.yml adequately!"
576
+ end
577
+ return true
578
+ elsif res.kind_of?(Net::HTTPSeeOther) or res.kind_of?(Net::HTTPTemporaryRedirect)
579
+ loc = URI.parse(res['Location'])
580
+ WCC.logger.warn "Redirect: requesting '#{loc.to_s}'"
581
+ res = site.fetch_redirect(loc)
582
+ if not res.kind_of?(Net::HTTPOK)
583
+ WCC.logger.error "Redirected site #{loc.to_s} returned #{res.code} code, skipping it."
584
+ WCC.logger.error "Headers: #{res.to_hash.inspect}"
585
+ return true
586
+ end
587
+ elsif res.kind_of?(Net::HTTPUnauthorized)
588
+ WCC.logger.error "Site #{site.uri.to_s} demands authentication for '#{res['www-authenticate']}', skipping it - consider using 'auth:' option in your conf.yml."
589
+ return true
590
+ elsif res.kind_of?(Net::HTTPNotFound)
591
+ WCC.logger.error "Site #{site.uri.to_s} not found (404), skipping it."
592
+ return true
593
+ elsif res.kind_of?(Net::HTTPForbidden)
594
+ WCC.logger.error "Site #{site.uri.to_s} forbids access (403), skipping it."
595
+ return true
596
+ elsif res.kind_of?(Net::HTTPInternalServerError)
597
+ WCC.logger.error "Site #{site.uri.to_s} has internal errors (500), skipping it."
598
+ return true
599
+ elsif res.kind_of?(Net::HTTPServiceUnavailable)
600
+ #retry_after = res['Retry-After']
601
+ WCC.logger.warn "Site #{site.uri.to_s} currently not available (503), skipping it."
602
+ return true
603
+ else
604
+ WCC.logger.error "Site #{site.uri.to_s} returned #{res.code} code, skipping it."
605
+ WCC.logger.error "Headers: #{res.to_hash.inspect}"
606
+ return true
607
+ end
608
+ end
609
+
610
+ def self.get_utf8_body(res, site)
611
+ # detect encoding from http header, meta element, default utf-8
612
+ # do not use utf-8 regex because it will fail on non utf-8 pages
613
+ encoding = (res['content-type'].to_s.match(/;\s*charset=([A-Za-z0-9-]*)/i).to_a[1] ||
614
+ res.body.match(/<meta.*charset=([a-zA-Z0-9-]*).*/i).to_a[1]).to_s.downcase || 'utf-8'
615
+ WCC.logger.info "Encoding is '#{encoding}'"
616
+ # convert to utf-8
617
+ begin
618
+ return Iconv.conv('utf-8', encoding, res.body)
619
+ rescue => ex
620
+ WCC.logger.error "Cannot convert site #{site.uri.to_s} from '#{encoding}': #{ex.to_s}"
621
+ end
622
+ nil
623
+ end
577
624
  end
578
625
  end
@@ -1,4 +1,4 @@
1
1
 
2
2
  module WCC
3
- VERSION = "2.2.0"
3
+ VERSION = "2.3.0"
4
4
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wcc
3
3
  version: !ruby/object:Gem::Version
4
- hash: 7
4
+ hash: 3
5
5
  prerelease:
6
6
  segments:
7
7
  - 2
8
- - 2
8
+ - 3
9
9
  - 0
10
- version: 2.2.0
10
+ version: 2.3.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Christian Nicolai
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-11-24 00:00:00 Z
18
+ date: 2012-06-30 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  name: htmlentities