wcc 2.2.0 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/assets/conf.yml +1 -0
- data/lib/wcc.rb +214 -167
- data/lib/wcc/version.rb +1 -1
- metadata +4 -4
data/assets/conf.yml
CHANGED
data/lib/wcc.rb
CHANGED
@@ -100,6 +100,7 @@ module WCC
|
|
100
100
|
:simulate => false,
|
101
101
|
:clean => false,
|
102
102
|
:nomails => false,
|
103
|
+
:stats => false,
|
103
104
|
# when you want to use ./tmp it must be writeable
|
104
105
|
:cache_dir => '/var/tmp/wcc',
|
105
106
|
:tag => 'wcc',
|
@@ -175,6 +176,8 @@ module WCC
|
|
175
176
|
@options[:filter_dir] ||= val
|
176
177
|
when 'templated'
|
177
178
|
@options[:template_dir] ||= val
|
179
|
+
when 'stats'
|
180
|
+
@options[:stats] ||= val
|
178
181
|
else
|
179
182
|
if not Notificators.mappings.include?(key)
|
180
183
|
plugin_name = "wcc-#{key}-notificator"
|
@@ -341,166 +344,23 @@ module WCC
|
|
341
344
|
end
|
342
345
|
|
343
346
|
class Prog
|
344
|
-
def self.checkForUpdate(site)
|
345
|
-
WCC.logger.info "Requesting '#{site.uri.to_s}'"
|
346
|
-
begin
|
347
|
-
res = site.fetch
|
348
|
-
rescue Timeout::Error => ex
|
349
|
-
# don't claim on this
|
350
|
-
return false
|
351
|
-
rescue => ex
|
352
|
-
WCC.logger.error "Cannot connect to #{site.uri.to_s} : #{ex.to_s}"
|
353
|
-
return false
|
354
|
-
end
|
355
|
-
if res.kind_of?(Net::HTTPOK)
|
356
|
-
# be happy!
|
357
|
-
elsif res.kind_of?(Net::HTTPMovedPermanently)
|
358
|
-
loc = res['Location']
|
359
|
-
if loc.nil?
|
360
|
-
WCC.logger.error "Site #{site.uri.to_s} moved permanently, skippong it - no new location given."
|
361
|
-
else
|
362
|
-
WCC.logger.error "Site #{site.uri.to_s} moved permanently to '#{loc}', skipping it - please update your conf.yml adequately!"
|
363
|
-
end
|
364
|
-
return false
|
365
|
-
elsif res.kind_of?(Net::HTTPSeeOther) or res.kind_of?(Net::HTTPTemporaryRedirect)
|
366
|
-
loc = URI.parse(res['Location'])
|
367
|
-
WCC.logger.warn "Redirect: requesting '#{loc.to_s}'"
|
368
|
-
res = site.fetch_redirect(loc)
|
369
|
-
if not res.kind_of?(Net::HTTPOK)
|
370
|
-
WCC.logger.error "Redirected site #{loc.to_s} returned #{res.code} code, skipping it."
|
371
|
-
WCC.logger.error "Headers: #{res.to_hash.inspect}"
|
372
|
-
return false
|
373
|
-
end
|
374
|
-
elsif res.kind_of?(Net::HTTPUnauthorized)
|
375
|
-
WCC.logger.error "Site #{site.uri.to_s} demands authentication for '#{res['www-authenticate']}', skipping it - consider using 'auth:' option in your conf.yml."
|
376
|
-
return false
|
377
|
-
elsif res.kind_of?(Net::HTTPNotFound)
|
378
|
-
WCC.logger.error "Site #{site.uri.to_s} not found, skipping it."
|
379
|
-
return false
|
380
|
-
elsif res.kind_of?(Net::HTTPForbidden)
|
381
|
-
WCC.logger.error "Site #{site.uri.to_s} forbids access, skipping it."
|
382
|
-
return false
|
383
|
-
elsif res.kind_of?(Net::HTTPInternalServerError)
|
384
|
-
WCC.logger.error "Site #{site.uri.to_s} has internal errors, skipping it."
|
385
|
-
return false
|
386
|
-
elsif res.kind_of?(Net::HTTPServiceUnavailable)
|
387
|
-
#retry_after = res['Retry-After']
|
388
|
-
WCC.logger.warn "Site #{site.uri.to_s} currently not available, skipping it."
|
389
|
-
return false
|
390
|
-
else
|
391
|
-
WCC.logger.error "Site #{site.uri.to_s} returned #{res.code} code, skipping it."
|
392
|
-
WCC.logger.error "Headers: #{res.to_hash.inspect}"
|
393
|
-
return false
|
394
|
-
end
|
395
|
-
|
396
|
-
new_content = res.body
|
397
|
-
|
398
|
-
# detect encoding from http header, meta element, default utf-8
|
399
|
-
# do not use utf-8 regex because it will fail on non utf-8 pages
|
400
|
-
encoding = (res['content-type'].to_s.match(/;\s*charset=([A-Za-z0-9-]*)/i).to_a[1] ||
|
401
|
-
new_content.match(/<meta.*charset=([a-zA-Z0-9-]*).*/i).to_a[1]).to_s.downcase || 'utf-8'
|
402
|
-
|
403
|
-
WCC.logger.info "Encoding is '#{encoding}'"
|
404
|
-
|
405
|
-
# convert to utf-8
|
406
|
-
begin
|
407
|
-
new_content = Iconv.conv('utf-8', encoding, new_content)
|
408
|
-
rescue => ex
|
409
|
-
WCC.logger.error "Cannot convert site from '#{encoding}': #{ex.to_s}"
|
410
|
-
return false
|
411
|
-
end
|
412
|
-
|
413
|
-
# strip html
|
414
|
-
new_content = new_content.strip_html if site.strip_html?
|
415
|
-
new_hash = Digest::MD5.hexdigest(new_content)
|
416
|
-
|
417
|
-
WCC.logger.debug "Compare hashes\n old: #{site.hash.to_s}\n new: #{new_hash.to_s}"
|
418
|
-
return false if new_hash == site.hash
|
419
|
-
|
420
|
-
# do not try diff or anything if site was never checked before
|
421
|
-
if site.new?
|
422
|
-
site.hash, site.content = new_hash, new_content
|
423
|
-
|
424
|
-
# signal that no diff was posible
|
425
|
-
diff = nil
|
426
|
-
else
|
427
|
-
# save old site to tmp file
|
428
|
-
old_site_file = Tempfile.new("wcc-#{site.id}-")
|
429
|
-
old_site_file.write(site.content)
|
430
|
-
old_site_file.close
|
431
|
-
|
432
|
-
# calculate labels before updating
|
433
|
-
old_label = "OLD (%s)" % File.mtime(Conf.file(site.id + ".md5")).strftime(DIFF_TIME_FMT)
|
434
|
-
new_label = "NEW (%s)" % Time.now.strftime(DIFF_TIME_FMT)
|
435
|
-
|
436
|
-
site.hash, site.content = new_hash, new_content
|
437
|
-
|
438
|
-
# diff between OLD and NEW
|
439
|
-
diff = %x[diff -U 1 --label "#{old_label}" --label "#{new_label}" #{old_site_file.path} #{Conf.file(site.id + '.site')}]
|
440
|
-
end
|
441
|
-
|
442
|
-
# construct the data made available to filters and templates
|
443
|
-
data = OpenStruct.new
|
444
|
-
data.site = site
|
445
|
-
data.diff = diff.nil? ? nil : WCC::Differ.new(diff)
|
446
|
-
data.tag = Conf[:tag]
|
447
|
-
|
448
|
-
# HACK: there *was* an update but no notification is required
|
449
|
-
return false if not Filters.accept(data, site.filters)
|
450
|
-
|
451
|
-
site.notify.each do |name|
|
452
|
-
rec = Conf.recipients[name]
|
453
|
-
if rec.nil?
|
454
|
-
WCC.logger.error "Could not notify recipient #{name} - not found!"
|
455
|
-
else
|
456
|
-
rec.each { |way| way.notify!(data) }
|
457
|
-
end
|
458
|
-
end
|
459
|
-
|
460
|
-
true
|
461
|
-
end
|
462
|
-
|
463
|
-
# main
|
464
347
|
def self.run!
|
465
|
-
# first use of Conf initializes it
|
466
|
-
WCC.logger = Logger.new(STDOUT)
|
467
|
-
|
468
348
|
# make sure logger is correctly configured
|
349
|
+
WCC.logger = Logger.new(STDOUT)
|
350
|
+
# first use of Conf initializes it
|
469
351
|
Conf.instance
|
470
352
|
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
WCC.logger.warn "Removing hash and diff files..."
|
476
|
-
Dir.foreach(Conf[:cache_dir]) do |f|
|
477
|
-
File.delete(Conf.file(f)) if f =~ /^.*\.(md5|site)$/
|
478
|
-
end
|
479
|
-
cache_file = Conf.file('cache.yml')
|
480
|
-
WCC.logger.warn "Removing timestamp cache..."
|
481
|
-
File.delete(cache_file) if File.exists?(cache_file)
|
482
|
-
Prog.exit 1
|
483
|
-
end
|
484
|
-
|
485
|
-
# read filter.d
|
486
|
-
Dir[File.join(Conf[:filter_dir], '*.rb')].each { |file| require file }
|
353
|
+
create_cache_dir
|
354
|
+
clean_cache_dir if Conf[:clean]
|
355
|
+
load_filters
|
356
|
+
load_timestamps
|
487
357
|
|
488
|
-
#
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
yaml = YAML.load_file(cache_file)
|
495
|
-
|
496
|
-
if not yaml
|
497
|
-
WCC.logger.info "No timestamps loaded"
|
498
|
-
else
|
499
|
-
@@timestamps = yaml['timestamps']
|
500
|
-
end
|
501
|
-
else
|
502
|
-
@@timestamps = {}
|
503
|
-
end
|
358
|
+
# stats
|
359
|
+
@@stats = {
|
360
|
+
'nruns' => 1,
|
361
|
+
'nsites' => 0, 'nnotifications' => 0, 'nerrors' => 0,
|
362
|
+
'nlines' => 0, 'nhunks' => 0
|
363
|
+
}
|
504
364
|
|
505
365
|
Conf.sites.each do |site|
|
506
366
|
ts_old = get_timestamp(site)
|
@@ -510,24 +370,22 @@ module WCC
|
|
510
370
|
WCC.logger.info "Skipping check for #{site.uri.host.to_s} due to check #{ts_diff} minute#{ts_diff == 1 ? '' : 's'} ago."
|
511
371
|
next
|
512
372
|
end
|
513
|
-
|
373
|
+
case checkForUpdate(site)
|
374
|
+
when :update
|
514
375
|
WCC.logger.warn "#{site.uri.host.to_s} has an update!"
|
515
|
-
|
376
|
+
when :noupdate
|
516
377
|
WCC.logger.info "#{site.uri.host.to_s} is unchanged"
|
378
|
+
when :error
|
379
|
+
@@stats['nerrors'] += 1
|
517
380
|
end
|
518
381
|
update_timestamp(site, ts_new)
|
519
382
|
end
|
520
383
|
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
# shut down notificators
|
525
|
-
Notificators.mappings.each do |name,klass|
|
526
|
-
WCC.logger.debug "Shut down #{klass}"
|
527
|
-
klass.shut_down
|
528
|
-
end
|
384
|
+
save_timestamps
|
385
|
+
update_stats if Conf[:stats]
|
386
|
+
shut_down_notificators
|
529
387
|
end
|
530
|
-
|
388
|
+
|
531
389
|
# Attempts to read the named template file from template.d
|
532
390
|
# and converts it into ERB.
|
533
391
|
#
|
@@ -543,7 +401,7 @@ module WCC
|
|
543
401
|
end
|
544
402
|
nil
|
545
403
|
end
|
546
|
-
|
404
|
+
|
547
405
|
# Attempts to write the given raw content to the named template file
|
548
406
|
# in template.d. This should be used to create initial template files on demand
|
549
407
|
# and will work only when file does not already exist.
|
@@ -574,5 +432,194 @@ module WCC
|
|
574
432
|
def self.update_timestamp(site, t)
|
575
433
|
@@timestamps[site.uri.to_s] = t
|
576
434
|
end
|
435
|
+
|
436
|
+
def self.create_cache_dir
|
437
|
+
Dir.mkdir(Conf[:cache_dir]) unless File.directory?(Conf[:cache_dir])
|
438
|
+
end
|
439
|
+
|
440
|
+
def self.clean_cache_dir
|
441
|
+
WCC.logger.warn "Removing hash and diff files..."
|
442
|
+
Dir.foreach(Conf[:cache_dir]) do |f|
|
443
|
+
File.delete(Conf.file(f)) if f =~ /^.*\.(md5|site)$/
|
444
|
+
end
|
445
|
+
# special files
|
446
|
+
cache_file = Conf.file('cache.yml')
|
447
|
+
WCC.logger.warn "Removing timestamp cache..."
|
448
|
+
File.delete(cache_file) if File.exists?(cache_file)
|
449
|
+
stats_file = Conf.file('stats.yml')
|
450
|
+
WCC.logger.warn "Removing stats file..."
|
451
|
+
File.delete(stats_file) if File.exists?(stats_file)
|
452
|
+
Prog.exit 1
|
453
|
+
end
|
454
|
+
|
455
|
+
def self.load_filters
|
456
|
+
Dir[File.join(Conf[:filter_dir], '*.rb')].each do |file|
|
457
|
+
require file
|
458
|
+
end
|
459
|
+
end
|
460
|
+
|
461
|
+
def self.load_timestamps
|
462
|
+
cache_file = Conf.file('cache.yml')
|
463
|
+
@@timestamps = {}
|
464
|
+
if File.exists?(cache_file)
|
465
|
+
WCC.logger.debug "Load timestamps from '#{cache_file}'"
|
466
|
+
# may be *false* if file is empty
|
467
|
+
yaml = YAML.load_file(cache_file)
|
468
|
+
if not yaml
|
469
|
+
WCC.logger.warn "No timestamps loaded"
|
470
|
+
else
|
471
|
+
@@timestamps = yaml['timestamps']
|
472
|
+
end
|
473
|
+
end
|
474
|
+
end
|
475
|
+
|
476
|
+
def self.save_timestamps
|
477
|
+
cache_file = Conf.file('cache.yml')
|
478
|
+
File.open(cache_file, 'w+') do |f| YAML.dump({"timestamps" => @@timestamps}, f) end
|
479
|
+
end
|
480
|
+
|
481
|
+
def self.update_stats
|
482
|
+
stats_file = Conf.file('stats.yml')
|
483
|
+
if File.exists?(stats_file)
|
484
|
+
WCC.logger.debug "Load stats from '#{stats_file}'"
|
485
|
+
yaml = YAML.load_file(stats_file)
|
486
|
+
if not yaml
|
487
|
+
WCC.logger.warn "No stats loaded"
|
488
|
+
else
|
489
|
+
# merge stats infos
|
490
|
+
@@stats.each do |k,v| @@stats[k] += yaml['stats'][k] end
|
491
|
+
end
|
492
|
+
end
|
493
|
+
File.open(stats_file, 'w+') do |f| YAML.dump({"stats" => @@stats}, f) end
|
494
|
+
end
|
495
|
+
|
496
|
+
def self.shut_down_notificators
|
497
|
+
Notificators.mappings.each do |name,klass|
|
498
|
+
WCC.logger.debug "Shut down #{klass}"
|
499
|
+
klass.shut_down
|
500
|
+
end
|
501
|
+
end
|
502
|
+
|
503
|
+
def self.checkForUpdate(site)
|
504
|
+
WCC.logger.info "Requesting '#{site.uri.to_s}'"
|
505
|
+
begin
|
506
|
+
res = site.fetch
|
507
|
+
rescue Timeout::Error => ex
|
508
|
+
return :noupdate # don't claim on this
|
509
|
+
rescue => ex
|
510
|
+
WCC.logger.error "Cannot connect to #{site.uri.to_s} : #{ex.to_s}"
|
511
|
+
return :error
|
512
|
+
end
|
513
|
+
return :error if handle_http_errors(res, site)
|
514
|
+
|
515
|
+
new_content = get_utf8_body(res, site)
|
516
|
+
return :error if new_content.nil?
|
517
|
+
|
518
|
+
# strip html
|
519
|
+
new_content = new_content.strip_html if site.strip_html?
|
520
|
+
|
521
|
+
new_hash = Digest::MD5.hexdigest(new_content)
|
522
|
+
WCC.logger.debug "Compare hashes\n old: #{site.hash.to_s}\n new: #{new_hash.to_s}"
|
523
|
+
return :noupdate if new_hash == site.hash
|
524
|
+
|
525
|
+
# do not try diff or anything if site was never checked before
|
526
|
+
if site.new?
|
527
|
+
site.hash, site.content = new_hash, new_content
|
528
|
+
diff = nil # no diff possible
|
529
|
+
else
|
530
|
+
# save old site to tmp file
|
531
|
+
old_site_file = Tempfile.open("wcc-#{site.id}-")
|
532
|
+
old_site_file.write(site.content)
|
533
|
+
old_site_file.close
|
534
|
+
old_label = "OLD (%s)" % File.mtime(Conf.file(site.id + '.md5')).strftime(DIFF_TIME_FMT)
|
535
|
+
new_label = "NEW (%s)" % Time.now.strftime(DIFF_TIME_FMT)
|
536
|
+
# save new site
|
537
|
+
site.hash, site.content = new_hash, new_content
|
538
|
+
# diff between OLD and NEW
|
539
|
+
diff = %x[diff -U 1 --label "#{old_label}" --label "#{new_label}" #{old_site_file.path} #{Conf.file(site.id + '.site')}]
|
540
|
+
end
|
541
|
+
|
542
|
+
# construct the data made available to filters and templates
|
543
|
+
data = OpenStruct.new(:site => site, :tag => Conf[:tag],
|
544
|
+
:diff => diff.nil? ? nil : WCC::Differ.new(diff))
|
545
|
+
|
546
|
+
@@stats['nsites'] += 1
|
547
|
+
if not data.diff.nil?
|
548
|
+
@@stats['nlines'] += data.diff.nlinesc
|
549
|
+
@@stats['nhunks'] += data.diff.nhunks
|
550
|
+
end
|
551
|
+
|
552
|
+
# HACK: there *was* an update but no notification is required
|
553
|
+
return :noupdate if not Filters.accept(data, site.filters)
|
554
|
+
|
555
|
+
site.notify.each do |name|
|
556
|
+
rec = Conf.recipients[name]
|
557
|
+
if rec.nil?
|
558
|
+
WCC.logger.error "Could not notify recipient #{name} - not found!"
|
559
|
+
else
|
560
|
+
@@stats['nnotifications'] += 1
|
561
|
+
rec.each { |way| way.notify!(data) }
|
562
|
+
end
|
563
|
+
end
|
564
|
+
|
565
|
+
:update
|
566
|
+
end
|
567
|
+
|
568
|
+
def self.handle_http_errors(res, site)
|
569
|
+
return false if res.kind_of?(Net::HTTPOK)
|
570
|
+
if res.kind_of?(Net::HTTPMovedPermanently)
|
571
|
+
loc = res['Location']
|
572
|
+
if loc.nil?
|
573
|
+
WCC.logger.error "Site #{site.uri.to_s} moved permanently, skipping it - no new location given."
|
574
|
+
else
|
575
|
+
WCC.logger.error "Site #{site.uri.to_s} moved permanently to '#{loc}', skipping it - please update your conf.yml adequately!"
|
576
|
+
end
|
577
|
+
return true
|
578
|
+
elsif res.kind_of?(Net::HTTPSeeOther) or res.kind_of?(Net::HTTPTemporaryRedirect)
|
579
|
+
loc = URI.parse(res['Location'])
|
580
|
+
WCC.logger.warn "Redirect: requesting '#{loc.to_s}'"
|
581
|
+
res = site.fetch_redirect(loc)
|
582
|
+
if not res.kind_of?(Net::HTTPOK)
|
583
|
+
WCC.logger.error "Redirected site #{loc.to_s} returned #{res.code} code, skipping it."
|
584
|
+
WCC.logger.error "Headers: #{res.to_hash.inspect}"
|
585
|
+
return true
|
586
|
+
end
|
587
|
+
elsif res.kind_of?(Net::HTTPUnauthorized)
|
588
|
+
WCC.logger.error "Site #{site.uri.to_s} demands authentication for '#{res['www-authenticate']}', skipping it - consider using 'auth:' option in your conf.yml."
|
589
|
+
return true
|
590
|
+
elsif res.kind_of?(Net::HTTPNotFound)
|
591
|
+
WCC.logger.error "Site #{site.uri.to_s} not found (404), skipping it."
|
592
|
+
return true
|
593
|
+
elsif res.kind_of?(Net::HTTPForbidden)
|
594
|
+
WCC.logger.error "Site #{site.uri.to_s} forbids access (403), skipping it."
|
595
|
+
return true
|
596
|
+
elsif res.kind_of?(Net::HTTPInternalServerError)
|
597
|
+
WCC.logger.error "Site #{site.uri.to_s} has internal errors (500), skipping it."
|
598
|
+
return true
|
599
|
+
elsif res.kind_of?(Net::HTTPServiceUnavailable)
|
600
|
+
#retry_after = res['Retry-After']
|
601
|
+
WCC.logger.warn "Site #{site.uri.to_s} currently not available (503), skipping it."
|
602
|
+
return true
|
603
|
+
else
|
604
|
+
WCC.logger.error "Site #{site.uri.to_s} returned #{res.code} code, skipping it."
|
605
|
+
WCC.logger.error "Headers: #{res.to_hash.inspect}"
|
606
|
+
return true
|
607
|
+
end
|
608
|
+
end
|
609
|
+
|
610
|
+
def self.get_utf8_body(res, site)
|
611
|
+
# detect encoding from http header, meta element, default utf-8
|
612
|
+
# do not use utf-8 regex because it will fail on non utf-8 pages
|
613
|
+
encoding = (res['content-type'].to_s.match(/;\s*charset=([A-Za-z0-9-]*)/i).to_a[1] ||
|
614
|
+
res.body.match(/<meta.*charset=([a-zA-Z0-9-]*).*/i).to_a[1]).to_s.downcase || 'utf-8'
|
615
|
+
WCC.logger.info "Encoding is '#{encoding}'"
|
616
|
+
# convert to utf-8
|
617
|
+
begin
|
618
|
+
return Iconv.conv('utf-8', encoding, res.body)
|
619
|
+
rescue => ex
|
620
|
+
WCC.logger.error "Cannot convert site #{site.uri.to_s} from '#{encoding}': #{ex.to_s}"
|
621
|
+
end
|
622
|
+
nil
|
623
|
+
end
|
577
624
|
end
|
578
625
|
end
|
data/lib/wcc/version.rb
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wcc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 3
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 2
|
8
|
-
-
|
8
|
+
- 3
|
9
9
|
- 0
|
10
|
-
version: 2.
|
10
|
+
version: 2.3.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Christian Nicolai
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date:
|
18
|
+
date: 2012-06-30 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
name: htmlentities
|