wcc 2.2.0 → 2.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. data/assets/conf.yml +1 -0
  2. data/lib/wcc.rb +214 -167
  3. data/lib/wcc/version.rb +1 -1
  4. metadata +4 -4
@@ -6,6 +6,7 @@ conf:
6
6
  # tag: wcc
7
7
  # filterd: ./filter.d
8
8
  # templated: ./template.d
9
+ # stats: yes
9
10
  # # default config assuming local mail server
10
11
  # email:
11
12
  # smtp:
data/lib/wcc.rb CHANGED
@@ -100,6 +100,7 @@ module WCC
100
100
  :simulate => false,
101
101
  :clean => false,
102
102
  :nomails => false,
103
+ :stats => false,
103
104
  # when you want to use ./tmp it must be writeable
104
105
  :cache_dir => '/var/tmp/wcc',
105
106
  :tag => 'wcc',
@@ -175,6 +176,8 @@ module WCC
175
176
  @options[:filter_dir] ||= val
176
177
  when 'templated'
177
178
  @options[:template_dir] ||= val
179
+ when 'stats'
180
+ @options[:stats] ||= val
178
181
  else
179
182
  if not Notificators.mappings.include?(key)
180
183
  plugin_name = "wcc-#{key}-notificator"
@@ -341,166 +344,23 @@ module WCC
341
344
  end
342
345
 
343
346
  class Prog
344
- def self.checkForUpdate(site)
345
- WCC.logger.info "Requesting '#{site.uri.to_s}'"
346
- begin
347
- res = site.fetch
348
- rescue Timeout::Error => ex
349
- # don't claim on this
350
- return false
351
- rescue => ex
352
- WCC.logger.error "Cannot connect to #{site.uri.to_s} : #{ex.to_s}"
353
- return false
354
- end
355
- if res.kind_of?(Net::HTTPOK)
356
- # be happy!
357
- elsif res.kind_of?(Net::HTTPMovedPermanently)
358
- loc = res['Location']
359
- if loc.nil?
360
- WCC.logger.error "Site #{site.uri.to_s} moved permanently, skippong it - no new location given."
361
- else
362
- WCC.logger.error "Site #{site.uri.to_s} moved permanently to '#{loc}', skipping it - please update your conf.yml adequately!"
363
- end
364
- return false
365
- elsif res.kind_of?(Net::HTTPSeeOther) or res.kind_of?(Net::HTTPTemporaryRedirect)
366
- loc = URI.parse(res['Location'])
367
- WCC.logger.warn "Redirect: requesting '#{loc.to_s}'"
368
- res = site.fetch_redirect(loc)
369
- if not res.kind_of?(Net::HTTPOK)
370
- WCC.logger.error "Redirected site #{loc.to_s} returned #{res.code} code, skipping it."
371
- WCC.logger.error "Headers: #{res.to_hash.inspect}"
372
- return false
373
- end
374
- elsif res.kind_of?(Net::HTTPUnauthorized)
375
- WCC.logger.error "Site #{site.uri.to_s} demands authentication for '#{res['www-authenticate']}', skipping it - consider using 'auth:' option in your conf.yml."
376
- return false
377
- elsif res.kind_of?(Net::HTTPNotFound)
378
- WCC.logger.error "Site #{site.uri.to_s} not found, skipping it."
379
- return false
380
- elsif res.kind_of?(Net::HTTPForbidden)
381
- WCC.logger.error "Site #{site.uri.to_s} forbids access, skipping it."
382
- return false
383
- elsif res.kind_of?(Net::HTTPInternalServerError)
384
- WCC.logger.error "Site #{site.uri.to_s} has internal errors, skipping it."
385
- return false
386
- elsif res.kind_of?(Net::HTTPServiceUnavailable)
387
- #retry_after = res['Retry-After']
388
- WCC.logger.warn "Site #{site.uri.to_s} currently not available, skipping it."
389
- return false
390
- else
391
- WCC.logger.error "Site #{site.uri.to_s} returned #{res.code} code, skipping it."
392
- WCC.logger.error "Headers: #{res.to_hash.inspect}"
393
- return false
394
- end
395
-
396
- new_content = res.body
397
-
398
- # detect encoding from http header, meta element, default utf-8
399
- # do not use utf-8 regex because it will fail on non utf-8 pages
400
- encoding = (res['content-type'].to_s.match(/;\s*charset=([A-Za-z0-9-]*)/i).to_a[1] ||
401
- new_content.match(/<meta.*charset=([a-zA-Z0-9-]*).*/i).to_a[1]).to_s.downcase || 'utf-8'
402
-
403
- WCC.logger.info "Encoding is '#{encoding}'"
404
-
405
- # convert to utf-8
406
- begin
407
- new_content = Iconv.conv('utf-8', encoding, new_content)
408
- rescue => ex
409
- WCC.logger.error "Cannot convert site from '#{encoding}': #{ex.to_s}"
410
- return false
411
- end
412
-
413
- # strip html
414
- new_content = new_content.strip_html if site.strip_html?
415
- new_hash = Digest::MD5.hexdigest(new_content)
416
-
417
- WCC.logger.debug "Compare hashes\n old: #{site.hash.to_s}\n new: #{new_hash.to_s}"
418
- return false if new_hash == site.hash
419
-
420
- # do not try diff or anything if site was never checked before
421
- if site.new?
422
- site.hash, site.content = new_hash, new_content
423
-
424
- # signal that no diff was posible
425
- diff = nil
426
- else
427
- # save old site to tmp file
428
- old_site_file = Tempfile.new("wcc-#{site.id}-")
429
- old_site_file.write(site.content)
430
- old_site_file.close
431
-
432
- # calculate labels before updating
433
- old_label = "OLD (%s)" % File.mtime(Conf.file(site.id + ".md5")).strftime(DIFF_TIME_FMT)
434
- new_label = "NEW (%s)" % Time.now.strftime(DIFF_TIME_FMT)
435
-
436
- site.hash, site.content = new_hash, new_content
437
-
438
- # diff between OLD and NEW
439
- diff = %x[diff -U 1 --label "#{old_label}" --label "#{new_label}" #{old_site_file.path} #{Conf.file(site.id + '.site')}]
440
- end
441
-
442
- # construct the data made available to filters and templates
443
- data = OpenStruct.new
444
- data.site = site
445
- data.diff = diff.nil? ? nil : WCC::Differ.new(diff)
446
- data.tag = Conf[:tag]
447
-
448
- # HACK: there *was* an update but no notification is required
449
- return false if not Filters.accept(data, site.filters)
450
-
451
- site.notify.each do |name|
452
- rec = Conf.recipients[name]
453
- if rec.nil?
454
- WCC.logger.error "Could not notify recipient #{name} - not found!"
455
- else
456
- rec.each { |way| way.notify!(data) }
457
- end
458
- end
459
-
460
- true
461
- end
462
-
463
- # main
464
347
  def self.run!
465
- # first use of Conf initializes it
466
- WCC.logger = Logger.new(STDOUT)
467
-
468
348
  # make sure logger is correctly configured
349
+ WCC.logger = Logger.new(STDOUT)
350
+ # first use of Conf initializes it
469
351
  Conf.instance
470
352
 
471
- # create cache dir for hash and diff files
472
- Dir.mkdir(Conf[:cache_dir]) unless File.directory?(Conf[:cache_dir])
473
-
474
- if(Conf[:clean])
475
- WCC.logger.warn "Removing hash and diff files..."
476
- Dir.foreach(Conf[:cache_dir]) do |f|
477
- File.delete(Conf.file(f)) if f =~ /^.*\.(md5|site)$/
478
- end
479
- cache_file = Conf.file('cache.yml')
480
- WCC.logger.warn "Removing timestamp cache..."
481
- File.delete(cache_file) if File.exists?(cache_file)
482
- Prog.exit 1
483
- end
484
-
485
- # read filter.d
486
- Dir[File.join(Conf[:filter_dir], '*.rb')].each { |file| require file }
353
+ create_cache_dir
354
+ clean_cache_dir if Conf[:clean]
355
+ load_filters
356
+ load_timestamps
487
357
 
488
- # timestamps
489
- cache_file = Conf.file('cache.yml')
490
- if File.exists?(cache_file)
491
- WCC.logger.debug "Load timestamps from '#{cache_file}'"
492
-
493
- # may be *false* if file is empty
494
- yaml = YAML.load_file(cache_file)
495
-
496
- if not yaml
497
- WCC.logger.info "No timestamps loaded"
498
- else
499
- @@timestamps = yaml['timestamps']
500
- end
501
- else
502
- @@timestamps = {}
503
- end
358
+ # stats
359
+ @@stats = {
360
+ 'nruns' => 1,
361
+ 'nsites' => 0, 'nnotifications' => 0, 'nerrors' => 0,
362
+ 'nlines' => 0, 'nhunks' => 0
363
+ }
504
364
 
505
365
  Conf.sites.each do |site|
506
366
  ts_old = get_timestamp(site)
@@ -510,24 +370,22 @@ module WCC
510
370
  WCC.logger.info "Skipping check for #{site.uri.host.to_s} due to check #{ts_diff} minute#{ts_diff == 1 ? '' : 's'} ago."
511
371
  next
512
372
  end
513
- if checkForUpdate(site)
373
+ case checkForUpdate(site)
374
+ when :update
514
375
  WCC.logger.warn "#{site.uri.host.to_s} has an update!"
515
- else
376
+ when :noupdate
516
377
  WCC.logger.info "#{site.uri.host.to_s} is unchanged"
378
+ when :error
379
+ @@stats['nerrors'] += 1
517
380
  end
518
381
  update_timestamp(site, ts_new)
519
382
  end
520
383
 
521
- # save timestamps
522
- File.open(cache_file, 'w+') do |f| YAML.dump({"timestamps" => @@timestamps}, f) end
523
-
524
- # shut down notificators
525
- Notificators.mappings.each do |name,klass|
526
- WCC.logger.debug "Shut down #{klass}"
527
- klass.shut_down
528
- end
384
+ save_timestamps
385
+ update_stats if Conf[:stats]
386
+ shut_down_notificators
529
387
  end
530
-
388
+
531
389
  # Attempts to read the named template file from template.d
532
390
  # and converts it into ERB.
533
391
  #
@@ -543,7 +401,7 @@ module WCC
543
401
  end
544
402
  nil
545
403
  end
546
-
404
+
547
405
  # Attempts to write the given raw content to the named template file
548
406
  # in template.d. This should be used to create initial template files on demand
549
407
  # and will work only when file does not already exist.
@@ -574,5 +432,194 @@ module WCC
574
432
  def self.update_timestamp(site, t)
575
433
  @@timestamps[site.uri.to_s] = t
576
434
  end
435
+
436
+ def self.create_cache_dir
437
+ Dir.mkdir(Conf[:cache_dir]) unless File.directory?(Conf[:cache_dir])
438
+ end
439
+
440
+ def self.clean_cache_dir
441
+ WCC.logger.warn "Removing hash and diff files..."
442
+ Dir.foreach(Conf[:cache_dir]) do |f|
443
+ File.delete(Conf.file(f)) if f =~ /^.*\.(md5|site)$/
444
+ end
445
+ # special files
446
+ cache_file = Conf.file('cache.yml')
447
+ WCC.logger.warn "Removing timestamp cache..."
448
+ File.delete(cache_file) if File.exists?(cache_file)
449
+ stats_file = Conf.file('stats.yml')
450
+ WCC.logger.warn "Removing stats file..."
451
+ File.delete(stats_file) if File.exists?(stats_file)
452
+ Prog.exit 1
453
+ end
454
+
455
+ def self.load_filters
456
+ Dir[File.join(Conf[:filter_dir], '*.rb')].each do |file|
457
+ require file
458
+ end
459
+ end
460
+
461
+ def self.load_timestamps
462
+ cache_file = Conf.file('cache.yml')
463
+ @@timestamps = {}
464
+ if File.exists?(cache_file)
465
+ WCC.logger.debug "Load timestamps from '#{cache_file}'"
466
+ # may be *false* if file is empty
467
+ yaml = YAML.load_file(cache_file)
468
+ if not yaml
469
+ WCC.logger.warn "No timestamps loaded"
470
+ else
471
+ @@timestamps = yaml['timestamps']
472
+ end
473
+ end
474
+ end
475
+
476
+ def self.save_timestamps
477
+ cache_file = Conf.file('cache.yml')
478
+ File.open(cache_file, 'w+') do |f| YAML.dump({"timestamps" => @@timestamps}, f) end
479
+ end
480
+
481
+ def self.update_stats
482
+ stats_file = Conf.file('stats.yml')
483
+ if File.exists?(stats_file)
484
+ WCC.logger.debug "Load stats from '#{stats_file}'"
485
+ yaml = YAML.load_file(stats_file)
486
+ if not yaml
487
+ WCC.logger.warn "No stats loaded"
488
+ else
489
+ # merge stats infos
490
+ @@stats.each do |k,v| @@stats[k] += yaml['stats'][k] end
491
+ end
492
+ end
493
+ File.open(stats_file, 'w+') do |f| YAML.dump({"stats" => @@stats}, f) end
494
+ end
495
+
496
+ def self.shut_down_notificators
497
+ Notificators.mappings.each do |name,klass|
498
+ WCC.logger.debug "Shut down #{klass}"
499
+ klass.shut_down
500
+ end
501
+ end
502
+
503
+ def self.checkForUpdate(site)
504
+ WCC.logger.info "Requesting '#{site.uri.to_s}'"
505
+ begin
506
+ res = site.fetch
507
+ rescue Timeout::Error => ex
508
+ return :noupdate # don't claim on this
509
+ rescue => ex
510
+ WCC.logger.error "Cannot connect to #{site.uri.to_s} : #{ex.to_s}"
511
+ return :error
512
+ end
513
+ return :error if handle_http_errors(res, site)
514
+
515
+ new_content = get_utf8_body(res, site)
516
+ return :error if new_content.nil?
517
+
518
+ # strip html
519
+ new_content = new_content.strip_html if site.strip_html?
520
+
521
+ new_hash = Digest::MD5.hexdigest(new_content)
522
+ WCC.logger.debug "Compare hashes\n old: #{site.hash.to_s}\n new: #{new_hash.to_s}"
523
+ return :noupdate if new_hash == site.hash
524
+
525
+ # do not try diff or anything if site was never checked before
526
+ if site.new?
527
+ site.hash, site.content = new_hash, new_content
528
+ diff = nil # no diff possible
529
+ else
530
+ # save old site to tmp file
531
+ old_site_file = Tempfile.open("wcc-#{site.id}-")
532
+ old_site_file.write(site.content)
533
+ old_site_file.close
534
+ old_label = "OLD (%s)" % File.mtime(Conf.file(site.id + '.md5')).strftime(DIFF_TIME_FMT)
535
+ new_label = "NEW (%s)" % Time.now.strftime(DIFF_TIME_FMT)
536
+ # save new site
537
+ site.hash, site.content = new_hash, new_content
538
+ # diff between OLD and NEW
539
+ diff = %x[diff -U 1 --label "#{old_label}" --label "#{new_label}" #{old_site_file.path} #{Conf.file(site.id + '.site')}]
540
+ end
541
+
542
+ # construct the data made available to filters and templates
543
+ data = OpenStruct.new(:site => site, :tag => Conf[:tag],
544
+ :diff => diff.nil? ? nil : WCC::Differ.new(diff))
545
+
546
+ @@stats['nsites'] += 1
547
+ if not data.diff.nil?
548
+ @@stats['nlines'] += data.diff.nlinesc
549
+ @@stats['nhunks'] += data.diff.nhunks
550
+ end
551
+
552
+ # HACK: there *was* an update but no notification is required
553
+ return :noupdate if not Filters.accept(data, site.filters)
554
+
555
+ site.notify.each do |name|
556
+ rec = Conf.recipients[name]
557
+ if rec.nil?
558
+ WCC.logger.error "Could not notify recipient #{name} - not found!"
559
+ else
560
+ @@stats['nnotifications'] += 1
561
+ rec.each { |way| way.notify!(data) }
562
+ end
563
+ end
564
+
565
+ :update
566
+ end
567
+
568
+ def self.handle_http_errors(res, site)
569
+ return false if res.kind_of?(Net::HTTPOK)
570
+ if res.kind_of?(Net::HTTPMovedPermanently)
571
+ loc = res['Location']
572
+ if loc.nil?
573
+ WCC.logger.error "Site #{site.uri.to_s} moved permanently, skipping it - no new location given."
574
+ else
575
+ WCC.logger.error "Site #{site.uri.to_s} moved permanently to '#{loc}', skipping it - please update your conf.yml adequately!"
576
+ end
577
+ return true
578
+ elsif res.kind_of?(Net::HTTPSeeOther) or res.kind_of?(Net::HTTPTemporaryRedirect)
579
+ loc = URI.parse(res['Location'])
580
+ WCC.logger.warn "Redirect: requesting '#{loc.to_s}'"
581
+ res = site.fetch_redirect(loc)
582
+ if not res.kind_of?(Net::HTTPOK)
583
+ WCC.logger.error "Redirected site #{loc.to_s} returned #{res.code} code, skipping it."
584
+ WCC.logger.error "Headers: #{res.to_hash.inspect}"
585
+ return true
586
+ end
587
+ elsif res.kind_of?(Net::HTTPUnauthorized)
588
+ WCC.logger.error "Site #{site.uri.to_s} demands authentication for '#{res['www-authenticate']}', skipping it - consider using 'auth:' option in your conf.yml."
589
+ return true
590
+ elsif res.kind_of?(Net::HTTPNotFound)
591
+ WCC.logger.error "Site #{site.uri.to_s} not found (404), skipping it."
592
+ return true
593
+ elsif res.kind_of?(Net::HTTPForbidden)
594
+ WCC.logger.error "Site #{site.uri.to_s} forbids access (403), skipping it."
595
+ return true
596
+ elsif res.kind_of?(Net::HTTPInternalServerError)
597
+ WCC.logger.error "Site #{site.uri.to_s} has internal errors (500), skipping it."
598
+ return true
599
+ elsif res.kind_of?(Net::HTTPServiceUnavailable)
600
+ #retry_after = res['Retry-After']
601
+ WCC.logger.warn "Site #{site.uri.to_s} currently not available (503), skipping it."
602
+ return true
603
+ else
604
+ WCC.logger.error "Site #{site.uri.to_s} returned #{res.code} code, skipping it."
605
+ WCC.logger.error "Headers: #{res.to_hash.inspect}"
606
+ return true
607
+ end
608
+ end
609
+
610
+ def self.get_utf8_body(res, site)
611
+ # detect encoding from http header, meta element, default utf-8
612
+ # do not use utf-8 regex because it will fail on non utf-8 pages
613
+ encoding = (res['content-type'].to_s.match(/;\s*charset=([A-Za-z0-9-]*)/i).to_a[1] ||
614
+ res.body.match(/<meta.*charset=([a-zA-Z0-9-]*).*/i).to_a[1]).to_s.downcase || 'utf-8'
615
+ WCC.logger.info "Encoding is '#{encoding}'"
616
+ # convert to utf-8
617
+ begin
618
+ return Iconv.conv('utf-8', encoding, res.body)
619
+ rescue => ex
620
+ WCC.logger.error "Cannot convert site #{site.uri.to_s} from '#{encoding}': #{ex.to_s}"
621
+ end
622
+ nil
623
+ end
577
624
  end
578
625
  end
@@ -1,4 +1,4 @@
1
1
 
2
2
  module WCC
3
- VERSION = "2.2.0"
3
+ VERSION = "2.3.0"
4
4
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wcc
3
3
  version: !ruby/object:Gem::Version
4
- hash: 7
4
+ hash: 3
5
5
  prerelease:
6
6
  segments:
7
7
  - 2
8
- - 2
8
+ - 3
9
9
  - 0
10
- version: 2.2.0
10
+ version: 2.3.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Christian Nicolai
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-11-24 00:00:00 Z
18
+ date: 2012-06-30 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  name: htmlentities