wcc 2.2.0 → 2.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/assets/conf.yml +1 -0
- data/lib/wcc.rb +214 -167
- data/lib/wcc/version.rb +1 -1
- metadata +4 -4
data/assets/conf.yml
CHANGED
data/lib/wcc.rb
CHANGED
@@ -100,6 +100,7 @@ module WCC
|
|
100
100
|
:simulate => false,
|
101
101
|
:clean => false,
|
102
102
|
:nomails => false,
|
103
|
+
:stats => false,
|
103
104
|
# when you want to use ./tmp it must be writeable
|
104
105
|
:cache_dir => '/var/tmp/wcc',
|
105
106
|
:tag => 'wcc',
|
@@ -175,6 +176,8 @@ module WCC
|
|
175
176
|
@options[:filter_dir] ||= val
|
176
177
|
when 'templated'
|
177
178
|
@options[:template_dir] ||= val
|
179
|
+
when 'stats'
|
180
|
+
@options[:stats] ||= val
|
178
181
|
else
|
179
182
|
if not Notificators.mappings.include?(key)
|
180
183
|
plugin_name = "wcc-#{key}-notificator"
|
@@ -341,166 +344,23 @@ module WCC
|
|
341
344
|
end
|
342
345
|
|
343
346
|
class Prog
|
344
|
-
def self.checkForUpdate(site)
|
345
|
-
WCC.logger.info "Requesting '#{site.uri.to_s}'"
|
346
|
-
begin
|
347
|
-
res = site.fetch
|
348
|
-
rescue Timeout::Error => ex
|
349
|
-
# don't claim on this
|
350
|
-
return false
|
351
|
-
rescue => ex
|
352
|
-
WCC.logger.error "Cannot connect to #{site.uri.to_s} : #{ex.to_s}"
|
353
|
-
return false
|
354
|
-
end
|
355
|
-
if res.kind_of?(Net::HTTPOK)
|
356
|
-
# be happy!
|
357
|
-
elsif res.kind_of?(Net::HTTPMovedPermanently)
|
358
|
-
loc = res['Location']
|
359
|
-
if loc.nil?
|
360
|
-
WCC.logger.error "Site #{site.uri.to_s} moved permanently, skippong it - no new location given."
|
361
|
-
else
|
362
|
-
WCC.logger.error "Site #{site.uri.to_s} moved permanently to '#{loc}', skipping it - please update your conf.yml adequately!"
|
363
|
-
end
|
364
|
-
return false
|
365
|
-
elsif res.kind_of?(Net::HTTPSeeOther) or res.kind_of?(Net::HTTPTemporaryRedirect)
|
366
|
-
loc = URI.parse(res['Location'])
|
367
|
-
WCC.logger.warn "Redirect: requesting '#{loc.to_s}'"
|
368
|
-
res = site.fetch_redirect(loc)
|
369
|
-
if not res.kind_of?(Net::HTTPOK)
|
370
|
-
WCC.logger.error "Redirected site #{loc.to_s} returned #{res.code} code, skipping it."
|
371
|
-
WCC.logger.error "Headers: #{res.to_hash.inspect}"
|
372
|
-
return false
|
373
|
-
end
|
374
|
-
elsif res.kind_of?(Net::HTTPUnauthorized)
|
375
|
-
WCC.logger.error "Site #{site.uri.to_s} demands authentication for '#{res['www-authenticate']}', skipping it - consider using 'auth:' option in your conf.yml."
|
376
|
-
return false
|
377
|
-
elsif res.kind_of?(Net::HTTPNotFound)
|
378
|
-
WCC.logger.error "Site #{site.uri.to_s} not found, skipping it."
|
379
|
-
return false
|
380
|
-
elsif res.kind_of?(Net::HTTPForbidden)
|
381
|
-
WCC.logger.error "Site #{site.uri.to_s} forbids access, skipping it."
|
382
|
-
return false
|
383
|
-
elsif res.kind_of?(Net::HTTPInternalServerError)
|
384
|
-
WCC.logger.error "Site #{site.uri.to_s} has internal errors, skipping it."
|
385
|
-
return false
|
386
|
-
elsif res.kind_of?(Net::HTTPServiceUnavailable)
|
387
|
-
#retry_after = res['Retry-After']
|
388
|
-
WCC.logger.warn "Site #{site.uri.to_s} currently not available, skipping it."
|
389
|
-
return false
|
390
|
-
else
|
391
|
-
WCC.logger.error "Site #{site.uri.to_s} returned #{res.code} code, skipping it."
|
392
|
-
WCC.logger.error "Headers: #{res.to_hash.inspect}"
|
393
|
-
return false
|
394
|
-
end
|
395
|
-
|
396
|
-
new_content = res.body
|
397
|
-
|
398
|
-
# detect encoding from http header, meta element, default utf-8
|
399
|
-
# do not use utf-8 regex because it will fail on non utf-8 pages
|
400
|
-
encoding = (res['content-type'].to_s.match(/;\s*charset=([A-Za-z0-9-]*)/i).to_a[1] ||
|
401
|
-
new_content.match(/<meta.*charset=([a-zA-Z0-9-]*).*/i).to_a[1]).to_s.downcase || 'utf-8'
|
402
|
-
|
403
|
-
WCC.logger.info "Encoding is '#{encoding}'"
|
404
|
-
|
405
|
-
# convert to utf-8
|
406
|
-
begin
|
407
|
-
new_content = Iconv.conv('utf-8', encoding, new_content)
|
408
|
-
rescue => ex
|
409
|
-
WCC.logger.error "Cannot convert site from '#{encoding}': #{ex.to_s}"
|
410
|
-
return false
|
411
|
-
end
|
412
|
-
|
413
|
-
# strip html
|
414
|
-
new_content = new_content.strip_html if site.strip_html?
|
415
|
-
new_hash = Digest::MD5.hexdigest(new_content)
|
416
|
-
|
417
|
-
WCC.logger.debug "Compare hashes\n old: #{site.hash.to_s}\n new: #{new_hash.to_s}"
|
418
|
-
return false if new_hash == site.hash
|
419
|
-
|
420
|
-
# do not try diff or anything if site was never checked before
|
421
|
-
if site.new?
|
422
|
-
site.hash, site.content = new_hash, new_content
|
423
|
-
|
424
|
-
# signal that no diff was posible
|
425
|
-
diff = nil
|
426
|
-
else
|
427
|
-
# save old site to tmp file
|
428
|
-
old_site_file = Tempfile.new("wcc-#{site.id}-")
|
429
|
-
old_site_file.write(site.content)
|
430
|
-
old_site_file.close
|
431
|
-
|
432
|
-
# calculate labels before updating
|
433
|
-
old_label = "OLD (%s)" % File.mtime(Conf.file(site.id + ".md5")).strftime(DIFF_TIME_FMT)
|
434
|
-
new_label = "NEW (%s)" % Time.now.strftime(DIFF_TIME_FMT)
|
435
|
-
|
436
|
-
site.hash, site.content = new_hash, new_content
|
437
|
-
|
438
|
-
# diff between OLD and NEW
|
439
|
-
diff = %x[diff -U 1 --label "#{old_label}" --label "#{new_label}" #{old_site_file.path} #{Conf.file(site.id + '.site')}]
|
440
|
-
end
|
441
|
-
|
442
|
-
# construct the data made available to filters and templates
|
443
|
-
data = OpenStruct.new
|
444
|
-
data.site = site
|
445
|
-
data.diff = diff.nil? ? nil : WCC::Differ.new(diff)
|
446
|
-
data.tag = Conf[:tag]
|
447
|
-
|
448
|
-
# HACK: there *was* an update but no notification is required
|
449
|
-
return false if not Filters.accept(data, site.filters)
|
450
|
-
|
451
|
-
site.notify.each do |name|
|
452
|
-
rec = Conf.recipients[name]
|
453
|
-
if rec.nil?
|
454
|
-
WCC.logger.error "Could not notify recipient #{name} - not found!"
|
455
|
-
else
|
456
|
-
rec.each { |way| way.notify!(data) }
|
457
|
-
end
|
458
|
-
end
|
459
|
-
|
460
|
-
true
|
461
|
-
end
|
462
|
-
|
463
|
-
# main
|
464
347
|
def self.run!
|
465
|
-
# first use of Conf initializes it
|
466
|
-
WCC.logger = Logger.new(STDOUT)
|
467
|
-
|
468
348
|
# make sure logger is correctly configured
|
349
|
+
WCC.logger = Logger.new(STDOUT)
|
350
|
+
# first use of Conf initializes it
|
469
351
|
Conf.instance
|
470
352
|
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
WCC.logger.warn "Removing hash and diff files..."
|
476
|
-
Dir.foreach(Conf[:cache_dir]) do |f|
|
477
|
-
File.delete(Conf.file(f)) if f =~ /^.*\.(md5|site)$/
|
478
|
-
end
|
479
|
-
cache_file = Conf.file('cache.yml')
|
480
|
-
WCC.logger.warn "Removing timestamp cache..."
|
481
|
-
File.delete(cache_file) if File.exists?(cache_file)
|
482
|
-
Prog.exit 1
|
483
|
-
end
|
484
|
-
|
485
|
-
# read filter.d
|
486
|
-
Dir[File.join(Conf[:filter_dir], '*.rb')].each { |file| require file }
|
353
|
+
create_cache_dir
|
354
|
+
clean_cache_dir if Conf[:clean]
|
355
|
+
load_filters
|
356
|
+
load_timestamps
|
487
357
|
|
488
|
-
#
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
yaml = YAML.load_file(cache_file)
|
495
|
-
|
496
|
-
if not yaml
|
497
|
-
WCC.logger.info "No timestamps loaded"
|
498
|
-
else
|
499
|
-
@@timestamps = yaml['timestamps']
|
500
|
-
end
|
501
|
-
else
|
502
|
-
@@timestamps = {}
|
503
|
-
end
|
358
|
+
# stats
|
359
|
+
@@stats = {
|
360
|
+
'nruns' => 1,
|
361
|
+
'nsites' => 0, 'nnotifications' => 0, 'nerrors' => 0,
|
362
|
+
'nlines' => 0, 'nhunks' => 0
|
363
|
+
}
|
504
364
|
|
505
365
|
Conf.sites.each do |site|
|
506
366
|
ts_old = get_timestamp(site)
|
@@ -510,24 +370,22 @@ module WCC
|
|
510
370
|
WCC.logger.info "Skipping check for #{site.uri.host.to_s} due to check #{ts_diff} minute#{ts_diff == 1 ? '' : 's'} ago."
|
511
371
|
next
|
512
372
|
end
|
513
|
-
|
373
|
+
case checkForUpdate(site)
|
374
|
+
when :update
|
514
375
|
WCC.logger.warn "#{site.uri.host.to_s} has an update!"
|
515
|
-
|
376
|
+
when :noupdate
|
516
377
|
WCC.logger.info "#{site.uri.host.to_s} is unchanged"
|
378
|
+
when :error
|
379
|
+
@@stats['nerrors'] += 1
|
517
380
|
end
|
518
381
|
update_timestamp(site, ts_new)
|
519
382
|
end
|
520
383
|
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
# shut down notificators
|
525
|
-
Notificators.mappings.each do |name,klass|
|
526
|
-
WCC.logger.debug "Shut down #{klass}"
|
527
|
-
klass.shut_down
|
528
|
-
end
|
384
|
+
save_timestamps
|
385
|
+
update_stats if Conf[:stats]
|
386
|
+
shut_down_notificators
|
529
387
|
end
|
530
|
-
|
388
|
+
|
531
389
|
# Attempts to read the named template file from template.d
|
532
390
|
# and converts it into ERB.
|
533
391
|
#
|
@@ -543,7 +401,7 @@ module WCC
|
|
543
401
|
end
|
544
402
|
nil
|
545
403
|
end
|
546
|
-
|
404
|
+
|
547
405
|
# Attempts to write the given raw content to the named template file
|
548
406
|
# in template.d. This should be used to create initial template files on demand
|
549
407
|
# and will work only when file does not already exist.
|
@@ -574,5 +432,194 @@ module WCC
|
|
574
432
|
def self.update_timestamp(site, t)
|
575
433
|
@@timestamps[site.uri.to_s] = t
|
576
434
|
end
|
435
|
+
|
436
|
+
def self.create_cache_dir
|
437
|
+
Dir.mkdir(Conf[:cache_dir]) unless File.directory?(Conf[:cache_dir])
|
438
|
+
end
|
439
|
+
|
440
|
+
def self.clean_cache_dir
|
441
|
+
WCC.logger.warn "Removing hash and diff files..."
|
442
|
+
Dir.foreach(Conf[:cache_dir]) do |f|
|
443
|
+
File.delete(Conf.file(f)) if f =~ /^.*\.(md5|site)$/
|
444
|
+
end
|
445
|
+
# special files
|
446
|
+
cache_file = Conf.file('cache.yml')
|
447
|
+
WCC.logger.warn "Removing timestamp cache..."
|
448
|
+
File.delete(cache_file) if File.exists?(cache_file)
|
449
|
+
stats_file = Conf.file('stats.yml')
|
450
|
+
WCC.logger.warn "Removing stats file..."
|
451
|
+
File.delete(stats_file) if File.exists?(stats_file)
|
452
|
+
Prog.exit 1
|
453
|
+
end
|
454
|
+
|
455
|
+
def self.load_filters
|
456
|
+
Dir[File.join(Conf[:filter_dir], '*.rb')].each do |file|
|
457
|
+
require file
|
458
|
+
end
|
459
|
+
end
|
460
|
+
|
461
|
+
def self.load_timestamps
|
462
|
+
cache_file = Conf.file('cache.yml')
|
463
|
+
@@timestamps = {}
|
464
|
+
if File.exists?(cache_file)
|
465
|
+
WCC.logger.debug "Load timestamps from '#{cache_file}'"
|
466
|
+
# may be *false* if file is empty
|
467
|
+
yaml = YAML.load_file(cache_file)
|
468
|
+
if not yaml
|
469
|
+
WCC.logger.warn "No timestamps loaded"
|
470
|
+
else
|
471
|
+
@@timestamps = yaml['timestamps']
|
472
|
+
end
|
473
|
+
end
|
474
|
+
end
|
475
|
+
|
476
|
+
def self.save_timestamps
|
477
|
+
cache_file = Conf.file('cache.yml')
|
478
|
+
File.open(cache_file, 'w+') do |f| YAML.dump({"timestamps" => @@timestamps}, f) end
|
479
|
+
end
|
480
|
+
|
481
|
+
def self.update_stats
|
482
|
+
stats_file = Conf.file('stats.yml')
|
483
|
+
if File.exists?(stats_file)
|
484
|
+
WCC.logger.debug "Load stats from '#{stats_file}'"
|
485
|
+
yaml = YAML.load_file(stats_file)
|
486
|
+
if not yaml
|
487
|
+
WCC.logger.warn "No stats loaded"
|
488
|
+
else
|
489
|
+
# merge stats infos
|
490
|
+
@@stats.each do |k,v| @@stats[k] += yaml['stats'][k] end
|
491
|
+
end
|
492
|
+
end
|
493
|
+
File.open(stats_file, 'w+') do |f| YAML.dump({"stats" => @@stats}, f) end
|
494
|
+
end
|
495
|
+
|
496
|
+
def self.shut_down_notificators
|
497
|
+
Notificators.mappings.each do |name,klass|
|
498
|
+
WCC.logger.debug "Shut down #{klass}"
|
499
|
+
klass.shut_down
|
500
|
+
end
|
501
|
+
end
|
502
|
+
|
503
|
+
def self.checkForUpdate(site)
|
504
|
+
WCC.logger.info "Requesting '#{site.uri.to_s}'"
|
505
|
+
begin
|
506
|
+
res = site.fetch
|
507
|
+
rescue Timeout::Error => ex
|
508
|
+
return :noupdate # don't claim on this
|
509
|
+
rescue => ex
|
510
|
+
WCC.logger.error "Cannot connect to #{site.uri.to_s} : #{ex.to_s}"
|
511
|
+
return :error
|
512
|
+
end
|
513
|
+
return :error if handle_http_errors(res, site)
|
514
|
+
|
515
|
+
new_content = get_utf8_body(res, site)
|
516
|
+
return :error if new_content.nil?
|
517
|
+
|
518
|
+
# strip html
|
519
|
+
new_content = new_content.strip_html if site.strip_html?
|
520
|
+
|
521
|
+
new_hash = Digest::MD5.hexdigest(new_content)
|
522
|
+
WCC.logger.debug "Compare hashes\n old: #{site.hash.to_s}\n new: #{new_hash.to_s}"
|
523
|
+
return :noupdate if new_hash == site.hash
|
524
|
+
|
525
|
+
# do not try diff or anything if site was never checked before
|
526
|
+
if site.new?
|
527
|
+
site.hash, site.content = new_hash, new_content
|
528
|
+
diff = nil # no diff possible
|
529
|
+
else
|
530
|
+
# save old site to tmp file
|
531
|
+
old_site_file = Tempfile.open("wcc-#{site.id}-")
|
532
|
+
old_site_file.write(site.content)
|
533
|
+
old_site_file.close
|
534
|
+
old_label = "OLD (%s)" % File.mtime(Conf.file(site.id + '.md5')).strftime(DIFF_TIME_FMT)
|
535
|
+
new_label = "NEW (%s)" % Time.now.strftime(DIFF_TIME_FMT)
|
536
|
+
# save new site
|
537
|
+
site.hash, site.content = new_hash, new_content
|
538
|
+
# diff between OLD and NEW
|
539
|
+
diff = %x[diff -U 1 --label "#{old_label}" --label "#{new_label}" #{old_site_file.path} #{Conf.file(site.id + '.site')}]
|
540
|
+
end
|
541
|
+
|
542
|
+
# construct the data made available to filters and templates
|
543
|
+
data = OpenStruct.new(:site => site, :tag => Conf[:tag],
|
544
|
+
:diff => diff.nil? ? nil : WCC::Differ.new(diff))
|
545
|
+
|
546
|
+
@@stats['nsites'] += 1
|
547
|
+
if not data.diff.nil?
|
548
|
+
@@stats['nlines'] += data.diff.nlinesc
|
549
|
+
@@stats['nhunks'] += data.diff.nhunks
|
550
|
+
end
|
551
|
+
|
552
|
+
# HACK: there *was* an update but no notification is required
|
553
|
+
return :noupdate if not Filters.accept(data, site.filters)
|
554
|
+
|
555
|
+
site.notify.each do |name|
|
556
|
+
rec = Conf.recipients[name]
|
557
|
+
if rec.nil?
|
558
|
+
WCC.logger.error "Could not notify recipient #{name} - not found!"
|
559
|
+
else
|
560
|
+
@@stats['nnotifications'] += 1
|
561
|
+
rec.each { |way| way.notify!(data) }
|
562
|
+
end
|
563
|
+
end
|
564
|
+
|
565
|
+
:update
|
566
|
+
end
|
567
|
+
|
568
|
+
def self.handle_http_errors(res, site)
|
569
|
+
return false if res.kind_of?(Net::HTTPOK)
|
570
|
+
if res.kind_of?(Net::HTTPMovedPermanently)
|
571
|
+
loc = res['Location']
|
572
|
+
if loc.nil?
|
573
|
+
WCC.logger.error "Site #{site.uri.to_s} moved permanently, skipping it - no new location given."
|
574
|
+
else
|
575
|
+
WCC.logger.error "Site #{site.uri.to_s} moved permanently to '#{loc}', skipping it - please update your conf.yml adequately!"
|
576
|
+
end
|
577
|
+
return true
|
578
|
+
elsif res.kind_of?(Net::HTTPSeeOther) or res.kind_of?(Net::HTTPTemporaryRedirect)
|
579
|
+
loc = URI.parse(res['Location'])
|
580
|
+
WCC.logger.warn "Redirect: requesting '#{loc.to_s}'"
|
581
|
+
res = site.fetch_redirect(loc)
|
582
|
+
if not res.kind_of?(Net::HTTPOK)
|
583
|
+
WCC.logger.error "Redirected site #{loc.to_s} returned #{res.code} code, skipping it."
|
584
|
+
WCC.logger.error "Headers: #{res.to_hash.inspect}"
|
585
|
+
return true
|
586
|
+
end
|
587
|
+
elsif res.kind_of?(Net::HTTPUnauthorized)
|
588
|
+
WCC.logger.error "Site #{site.uri.to_s} demands authentication for '#{res['www-authenticate']}', skipping it - consider using 'auth:' option in your conf.yml."
|
589
|
+
return true
|
590
|
+
elsif res.kind_of?(Net::HTTPNotFound)
|
591
|
+
WCC.logger.error "Site #{site.uri.to_s} not found (404), skipping it."
|
592
|
+
return true
|
593
|
+
elsif res.kind_of?(Net::HTTPForbidden)
|
594
|
+
WCC.logger.error "Site #{site.uri.to_s} forbids access (403), skipping it."
|
595
|
+
return true
|
596
|
+
elsif res.kind_of?(Net::HTTPInternalServerError)
|
597
|
+
WCC.logger.error "Site #{site.uri.to_s} has internal errors (500), skipping it."
|
598
|
+
return true
|
599
|
+
elsif res.kind_of?(Net::HTTPServiceUnavailable)
|
600
|
+
#retry_after = res['Retry-After']
|
601
|
+
WCC.logger.warn "Site #{site.uri.to_s} currently not available (503), skipping it."
|
602
|
+
return true
|
603
|
+
else
|
604
|
+
WCC.logger.error "Site #{site.uri.to_s} returned #{res.code} code, skipping it."
|
605
|
+
WCC.logger.error "Headers: #{res.to_hash.inspect}"
|
606
|
+
return true
|
607
|
+
end
|
608
|
+
end
|
609
|
+
|
610
|
+
def self.get_utf8_body(res, site)
|
611
|
+
# detect encoding from http header, meta element, default utf-8
|
612
|
+
# do not use utf-8 regex because it will fail on non utf-8 pages
|
613
|
+
encoding = (res['content-type'].to_s.match(/;\s*charset=([A-Za-z0-9-]*)/i).to_a[1] ||
|
614
|
+
res.body.match(/<meta.*charset=([a-zA-Z0-9-]*).*/i).to_a[1]).to_s.downcase || 'utf-8'
|
615
|
+
WCC.logger.info "Encoding is '#{encoding}'"
|
616
|
+
# convert to utf-8
|
617
|
+
begin
|
618
|
+
return Iconv.conv('utf-8', encoding, res.body)
|
619
|
+
rescue => ex
|
620
|
+
WCC.logger.error "Cannot convert site #{site.uri.to_s} from '#{encoding}': #{ex.to_s}"
|
621
|
+
end
|
622
|
+
nil
|
623
|
+
end
|
577
624
|
end
|
578
625
|
end
|
data/lib/wcc/version.rb
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wcc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 3
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 2
|
8
|
-
-
|
8
|
+
- 3
|
9
9
|
- 0
|
10
|
-
version: 2.
|
10
|
+
version: 2.3.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Christian Nicolai
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date:
|
18
|
+
date: 2012-06-30 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
name: htmlentities
|