wcc 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. data/bin/wcc +6 -0
  2. data/lib/wcc.rb +508 -0
  3. metadata +79 -0
data/bin/wcc ADDED
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/ruby -KuW0
2
+ # encoding: UTF-8
3
+
4
+ require 'wcc'
5
+
6
+ WCC::Prog.run!
@@ -0,0 +1,508 @@
1
+
2
+ require 'base64'
3
+ require 'digest/md5'
4
+ require 'iconv'
5
+ require 'logger'
6
+ require 'net/http'
7
+ require 'net/https'
8
+ require 'net/smtp'
9
+ require 'optparse'
10
+ require 'pathname'
11
+ require 'singleton'
12
+ require 'tempfile'
13
+ require 'uri'
14
+ require 'yaml'
15
+
16
+ # ruby gem dependencies
17
+ require 'htmlentities'
18
+
19
+ class String
20
+ def strip_html
21
+ # remove all HTML <tags> with at least 1 character name
22
+ # and decode all HTML entities into UTF-8 characters
23
+ HTMLEntities.new.decode(self.gsub(/<[^>]+>/, ' '))
24
+ end
25
+ end
26
+
27
+ module WCC
28
+
29
+ DIFF_TIME_FMT = '%Y-%m-%d %H:%M:%S %Z'
30
+
31
+ # logging via WCC.logger.blub
32
+ def self.logger
33
+ @logger
34
+ end
35
+
36
+ def self.logger=(logger)
37
+ @logger = logger
38
+ end
39
+
40
+ class Conf
41
+ include Singleton
42
+
43
+ # use Conf like a hash containing all options
44
+ def [](key)
45
+ @options[key.to_sym] || Conf.default[key.to_sym]
46
+ end
47
+ def []=(key, val)
48
+ @options[key.to_sym] = val unless val.nil?
49
+ end
50
+
51
+ def self.default
52
+ @default_conf ||= {
53
+ :verbose => false,
54
+ :debug => false,
55
+ :simulate => false,
56
+ :clean => false,
57
+ :nomails => false,
58
+ :dir => '/var/tmp/wcc',
59
+ :tag => 'wcc',
60
+ :syslog => false,
61
+ :filterd => './filter.d',
62
+ :mailer => 'smtp',
63
+ :smtp_host => 'localhost',
64
+ :smtp_port => 25
65
+ }
66
+ end
67
+
68
+ def initialize
69
+ @options = {}
70
+
71
+ OptionParser.new do |opts|
72
+ opts.banner = "Usage: ruby wcc.rb [options] [config-yaml-file]"
73
+ opts.on('-v', '--verbose', 'Output more information') do self[:verbose] = true end
74
+ opts.on('-d', '--debug', 'Enable debug mode') do self[:debug] = true end
75
+ opts.on('-o', '--dir DIR', 'Save required files to DIR') do |dir| self[:dir] = dir end
76
+ opts.on('-s', '--simulate', 'Check for update but does not save any data') do self[:simulate] = true end
77
+ opts.on('-c', '--clean', 'Removes all hash and diff files') do self[:clean] = true end
78
+ opts.on('-t', '--tag TAG', 'Sets TAG used in output') do |t| self[:tag] = t end
79
+ opts.on('-n', '--no-mails', 'Does not send any emails') do self[:nomails] = true end
80
+ opts.on('-f', '--from MAIL', 'Set sender mail address') do |m| self[:from_mail] = m end
81
+ opts.on('--host HOST', 'Sets SMTP host') do |h| self[:host] = h end
82
+ opts.on('--port PORT', 'Sets SMTP port') do |p| self[:port] = p end
83
+ opts.on('--show-config', 'Show config after loading config file.') do self[:show_config] = true end
84
+ opts.on('-h', '-?', '--help', 'Display this screen') do
85
+ puts opts
86
+ exit
87
+ end
88
+ end.parse!
89
+
90
+ WCC.logger.progname = 'wcc'
91
+
92
+ # latest flag overrides everything
93
+ WCC.logger.level = Logger::ERROR
94
+ WCC.logger.level = Logger::INFO if self[:verbose]
95
+ WCC.logger.level = Logger::DEBUG if self[:debug]
96
+
97
+ WCC.logger.formatter = MyFormatter.new((self[:verbose] or self[:debug]))
98
+
99
+ # main
100
+ WCC.logger.info "No config file given, using default 'conf.yml' file" if ARGV.length == 0
101
+
102
+ self[:conf] = ARGV[0] || 'conf.yml'
103
+
104
+ if !File.exists?(self[:conf])
105
+ WCC.logger.fatal "Config file '#{self[:conf]}' does not exist!"
106
+ exit 1
107
+ end
108
+
109
+ WCC.logger.debug "Load config from '#{self[:conf]}'"
110
+
111
+ # may be false if file is empty
112
+ yaml = YAML.load_file(self[:conf])
113
+ if yaml.is_a?(Hash) and (yaml = yaml['conf']).is_a?(Hash)
114
+ @options[:from_mail] ||= yaml['from_addr']
115
+ @options[:dir] ||= yaml['cache_dir']
116
+ @options[:tag] ||= yaml['tag']
117
+ @options[:syslog] ||= yaml['use_syslog']
118
+ @options[:filterd] ||= yaml['filterd']
119
+
120
+ if yaml['email'].is_a?(Hash)
121
+ if yaml['email']['smtp'].is_a?(Hash)
122
+ @options[:mailer] = 'smtp'
123
+ @options[:smtp_host] ||= yaml['email']['smtp']['host']
124
+ # yaml parser should provide an integer here
125
+ @options[:smtp_port] ||= yaml['email']['smtp']['port']
126
+ end
127
+ end
128
+ end
129
+
130
+ if self[:from_mail].to_s.empty?
131
+ WCC.logger.fatal "No sender mail address given! See help."
132
+ exit 1
133
+ end
134
+
135
+ if self[:show_config]
136
+ Conf.default.merge(@options).each do |k,v|
137
+ puts " #{k.to_s} => #{self[k]}"
138
+ end
139
+ exit 0
140
+ end
141
+
142
+ # create dir for hash files
143
+ Dir.mkdir(self[:dir]) unless File.directory?(self[:dir])
144
+
145
+ if(self[:clean])
146
+ WCC.logger.warn "Cleanup hash and diff files"
147
+ Dir.foreach(self[:dir]) do |f|
148
+ File.delete(self.file(f)) if f =~ /^.*\.(md5|site)$/
149
+ end
150
+ end
151
+
152
+ # read filter.d
153
+ Dir[File.join(self[:filterd], '*.rb')].each { |file| require file }
154
+ end
155
+
156
+ def self.sites
157
+ return @sites unless @sites.nil?
158
+
159
+ @sites = []
160
+
161
+ WCC.logger.debug "Load sites from '#{Conf[:conf]}'"
162
+
163
+ # may be false if file is empty
164
+ yaml = YAML.load_file(Conf[:conf])
165
+
166
+ yaml['sites'].to_a.each do |yaml_site|
167
+ filterrefs = []
168
+ (yaml_site['filters'] || []).each do |entry|
169
+ if entry.is_a?(Hash)
170
+ # hash containing only one key (filter id),
171
+ # the value is the argument hash
172
+ id = entry.keys[0]
173
+ filterrefs << FilterRef.new(id, entry[id])
174
+ else entry.is_a?(String)
175
+ filterrefs << FilterRef.new(entry, {})
176
+ end
177
+ end
178
+
179
+ if not yaml_site['cookie'].nil?
180
+ cookie = File.open(yaml_site['cookie'], 'r') { |f| f.read }
181
+ end
182
+
183
+ @sites << Site.new(
184
+ yaml_site['url'],
185
+ yaml_site['strip_html'] || true,
186
+ yaml_site['emails'].map { |m| MailAddress.new(m) } || [],
187
+ filterrefs,
188
+ yaml_site['auth'] || {},
189
+ cookie)
190
+ end if yaml
191
+
192
+ WCC.logger.debug @sites.length.to_s + (@sites.length == 1 ? ' site' : ' sites') + " loaded\n" +
193
+ @sites.map { |s| " #{s.uri.host.to_s}\n url: #{s.uri.to_s}\n id: #{s.id}" }.join("\n")
194
+
195
+ @sites
196
+ end
197
+
198
+ def self.mailer
199
+ if @mailer.nil?
200
+ # smtp mailer
201
+ if Conf[:mailer] == 'smtp'
202
+ @mailer = SmtpMailer.new(Conf[:smtp_host], Conf[:smtp_port])
203
+ end
204
+ end
205
+ @mailer
206
+ end
207
+
208
+ def self.file(path = nil) File.join(self[:dir], path) end
209
+ def self.simulate?; self[:simulate] end
210
+ def self.send_mails?; !self[:nomails] end
211
+ def self.[](key); Conf.instance[key] end
212
+ end
213
+
214
+ class FilterRef
215
+ attr_reader :id, :arguments
216
+
217
+ def initialize(id, arguments)
218
+ @id = id
219
+ @arguments = arguments
220
+ end
221
+
222
+ def to_s; @id end
223
+ end
224
+
225
+ class Site
226
+ attr_reader :uri, :emails, :filters, :auth, :cookie, :id
227
+
228
+ def initialize(url, strip_html, emails, filters, auth, cookie)
229
+ @uri = URI.parse(url)
230
+ @strip_html = strip_html
231
+ @emails = emails.is_a?(Array) ? emails : [emails]
232
+ @filters = filters.is_a?(Array) ? filters : [filters]
233
+ @auth = auth
234
+ @cookie = cookie
235
+ @id = Digest::MD5.hexdigest(url.to_s)[0...8]
236
+ # invalid hashes are ""
237
+ load_hash
238
+ end
239
+
240
+ def strip_html?; @strip_html end
241
+
242
+ def new?
243
+ hash.empty?
244
+ end
245
+
246
+ def load_hash
247
+ file = Conf.file(@id + '.md5')
248
+ if File.exists?(file)
249
+ WCC.logger.debug "Load hash from file '#{file}'"
250
+ File.open(file, 'r') { |f| @hash = f.gets; break }
251
+ else
252
+ WCC.logger.info "Site #{uri.host} was never checked before."
253
+ @hash = ""
254
+ end
255
+ end
256
+
257
+ def load_content
258
+ file = Conf.file(@id + '.site')
259
+ if File.exists?(file)
260
+ File.open(file, 'r') { |f| @content = f.read }
261
+ end
262
+ end
263
+
264
+ def hash; @hash end
265
+
266
+ def hash=(hash)
267
+ @hash = hash
268
+ File.open(Conf.file(@id + '.md5'), 'w') { |f| f.write(@hash) } unless Conf.simulate?
269
+ end
270
+
271
+ def content; load_content if @content.nil?; @content end
272
+
273
+ def content=(content)
274
+ @content = content
275
+ File.open(Conf.file(@id + '.site'), 'w') { |f| f.write(@content) } unless Conf.simulate?
276
+ end
277
+ end
278
+
279
+ class MailAddress
280
+ def initialize(email)
281
+ email = email.to_s if email.is_a?(MailAddress)
282
+ @email = email.strip
283
+ end
284
+
285
+ def name
286
+ if @email =~ /^[\w\s]+<.+@[^@]+>$/
287
+ @email.gsub(/<.+?>/, '').strip
288
+ else
289
+ @email.split("@")[0...-1].join("@")
290
+ end
291
+ end
292
+
293
+ def address
294
+ if @email =~ /^[\w\s]+<.+@[^@]+>$/
295
+ @email.match(/<([^>]+@[^@>]+)>/)[1]
296
+ else
297
+ @email
298
+ end
299
+ end
300
+
301
+ def to_s; @email end
302
+ end
303
+
304
+ class Mail
305
+ attr_reader :title, :message
306
+
307
+ def initialize(title, message, options = {})
308
+ @title = title
309
+ @message = message
310
+ @options = {:from => MailAddress.new(Conf[:from_mail])}
311
+ @options[:from] = MailAddress.new(options[:from]) unless options[:from].nil?
312
+ end
313
+
314
+ def send(tos = [])
315
+ Conf.mailer.send(self, @options[:from], tos)
316
+ end
317
+ end
318
+
319
+ class SmtpMailer
320
+ def initialize(host, port)
321
+ @host = host
322
+ @port = port
323
+ end
324
+
325
+ def send(mail, from, to = [])
326
+ Net::SMTP.start(@host, @port) do |smtp|
327
+ to.each do |toaddr|
328
+ msg = "From: #{from.name} <#{from.address}>\n"
329
+ msg += "To: #{toaddr}\n"
330
+ msg += "Subject: #{mail.title.gsub(/\s+/, ' ')}\n"
331
+ msg += "Content-Type: text/plain; charset=\"utf-8\"\n"
332
+ msg += "Content-Transfer-Encoding: base64\n"
333
+ msg += "\n"
334
+ msg += Base64.encode64(mail.message)
335
+
336
+ smtp.send_message(msg, from.address, toaddr.address)
337
+ end
338
+ end
339
+ rescue
340
+ WCC.logger.fatal "Cannot send mails at #{@host}:#{@port} : #{$!.to_s}"
341
+ end
342
+ end
343
+
344
+ class Filter
345
+ @@filters = {}
346
+
347
+ def self.add(id, &block)
348
+ WCC.logger.info "Adding filter '#{id}'"
349
+ @@filters[id] = block
350
+ end
351
+
352
+ def self.accept(data, filters)
353
+ return true if filters.nil?
354
+
355
+ WCC.logger.info "Testing with filters: #{filters.join(', ')}"
356
+
357
+ filters.each do |filterref|
358
+ block = @@filters[filterref.id]
359
+
360
+ if block.nil?
361
+ WCC.logger.error "Requested filter '#{filterref.id}' not found, skipping it."
362
+ next
363
+ end
364
+
365
+ if not block.call(data, filterref.arguments)
366
+ WCC.logger.info "Filter #{filterref.id} failed!"
367
+ return false
368
+ end
369
+ end
370
+ true
371
+ end
372
+ end
373
+
374
+ class MyFormatter
375
+ def initialize(use_color = true)
376
+ @color = use_color
377
+ end
378
+
379
+ def white; "\e[1;37m" end
380
+ def cyan; "\e[1;36m" end
381
+ def magenta;"\e[1;35m" end
382
+ def blue; "\e[1;34m" end
383
+ def yellow; "\e[1;33m" end
384
+ def green; "\e[1;32m" end
385
+ def red; "\e[1;31m" end
386
+ def black; "\e[1;30m" end
387
+ def rst; "\e[0m" end
388
+
389
+ def call(lvl, time, progname, msg)
390
+ text = "%s: %s" % [lvl, msg.to_s]
391
+ if @color
392
+ return [magenta, text, rst, "\n"].join if lvl == "FATAL"
393
+ return [red, text, rst, "\n"].join if lvl == "ERROR"
394
+ return [yellow, text, rst, "\n"].join if lvl == "WARN"
395
+ end
396
+ [text, "\n"].join
397
+ end
398
+ end
399
+
400
+ class Prog
401
+
402
+ # TODO: move to Site
403
+ def self.fetch(site)
404
+ http = Net::HTTP.new(site.uri.host, site.uri.port)
405
+ if site.uri.is_a?(URI::HTTPS)
406
+ http.use_ssl = true
407
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE
408
+ end
409
+ http.start do |http|
410
+ req = Net::HTTP::Get.new(site.uri.request_uri)
411
+ if site.auth['type'] == 'basic'
412
+ WCC.logger.debug "Doing basic auth"
413
+ req.basic_auth(site.auth['username'], site.auth['password'])
414
+ end
415
+ if not site.cookie.nil?
416
+ req.add_field("Cookie", site.cookie)
417
+ end
418
+ http.request(req)
419
+ end
420
+ end
421
+
422
+ def self.checkForUpdate(site)
423
+ WCC.logger.info "Requesting '#{site.uri.to_s}'"
424
+ begin
425
+ res = fetch(site)
426
+ rescue StandardError, Timeout::Error => ex
427
+ WCC.logger.error "Cannot connect to #{site.uri.to_s} : #{ex.to_s}"
428
+ return false
429
+ end
430
+ if not res.kind_of?(Net::HTTPOK)
431
+ WCC.logger.error "Site #{site.uri.to_s} returned #{res.code} code, skipping it."
432
+ return false
433
+ end
434
+
435
+ new_content = res.body
436
+
437
+ # detect encoding from http header, meta element, default utf-8
438
+ # do not use utf-8 regex because it will fail on non utf-8 pages
439
+ encoding = (res['content-type'].to_s.match(/;\s*charset=([A-Za-z0-9-]*)/i).to_a[1] ||
440
+ new_content.match(/<meta.*charset=([a-zA-Z0-9-]*).*/i).to_a[1]).to_s.downcase || 'utf-8'
441
+
442
+ WCC.logger.info "Encoding is '#{encoding}'"
443
+
444
+ # convert to utf-8
445
+ begin
446
+ new_content = Iconv.conv('utf-8', encoding, new_content)
447
+ rescue => ex
448
+ WCC.logger.error "Cannot convert site from '#{encoding}': #{ex.to_s}"
449
+ return false
450
+ end
451
+
452
+ # strip html _before_ diffing
453
+ new_content = new_content.strip_html if site.strip_html?
454
+ new_hash = Digest::MD5.hexdigest(new_content)
455
+
456
+ WCC.logger.debug "Compare hashes\n old: #{site.hash.to_s}\n new: #{new_hash.to_s}"
457
+ return false if new_hash == site.hash
458
+
459
+ # do not try diff or anything if site was never checked before
460
+ if site.new?
461
+ # update content
462
+ site.hash, site.content = new_hash, new_content
463
+
464
+ # set custom diff message
465
+ diff = "Site was first checked so no diff was possible."
466
+ else
467
+ # save old site to tmp file
468
+ old_site_file = Tempfile.new("wcc-#{site.id}-")
469
+ old_site_file.write(site.content)
470
+ old_site_file.close
471
+
472
+ # calculate labels before updating
473
+ old_label = "OLD (%s)" % File.mtime(Conf.file(site.id + ".md5")).strftime(DIFF_TIME_FMT)
474
+ new_label = "NEW (%s)" % Time.now.strftime(DIFF_TIME_FMT)
475
+
476
+ # do update
477
+ site.hash, site.content = new_hash, new_content
478
+
479
+ # diff between OLD and NEW
480
+ diff = %x[diff -U 1 --label "#{old_label}" --label "#{new_label}" #{old_site_file.path} #{Conf.file(site.id + '.site')}]
481
+ end
482
+
483
+ return false if not Filter.accept(diff, site.filters)
484
+
485
+ Mail.new(
486
+ "[#{Conf[:tag]}] #{site.uri.host} changed",
487
+ "Change at #{site.uri.to_s} - diff follows:\n\n#{diff}"
488
+ ).send(site.emails) if Conf.send_mails?
489
+
490
+ system("logger -t '#{Conf[:tag]}' 'Change at #{site.uri.to_s} (tag #{site.id}) detected'") if Conf[:syslog]
491
+
492
+ true
493
+ end
494
+
495
+ # main
496
+ def self.run!
497
+ WCC.logger = Logger.new(STDOUT)
498
+
499
+ Conf.sites.each do |site|
500
+ if checkForUpdate(site)
501
+ WCC.logger.warn "#{site.uri.host.to_s} has an update!"
502
+ else
503
+ WCC.logger.info "#{site.uri.host.to_s} is unchanged"
504
+ end
505
+ end
506
+ end
507
+ end
508
+ end
metadata ADDED
@@ -0,0 +1,79 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: wcc
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - Christian Nicolai
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-09-30 00:00:00 Z
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: htmlentities
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ hash: 3
29
+ segments:
30
+ - 0
31
+ version: "0"
32
+ type: :runtime
33
+ version_requirements: *id001
34
+ description: wcc tracks changes of websites and notifies you by email.
35
+ email: chrnicolai@gmail.com
36
+ executables:
37
+ - wcc
38
+ extensions: []
39
+
40
+ extra_rdoc_files: []
41
+
42
+ files:
43
+ - bin/wcc
44
+ - lib/wcc.rb
45
+ homepage: https://github.com/cmur2/wcc
46
+ licenses: []
47
+
48
+ post_install_message:
49
+ rdoc_options: []
50
+
51
+ require_paths:
52
+ - lib
53
+ required_ruby_version: !ruby/object:Gem::Requirement
54
+ none: false
55
+ requirements:
56
+ - - ">="
57
+ - !ruby/object:Gem::Version
58
+ hash: 3
59
+ segments:
60
+ - 0
61
+ version: "0"
62
+ required_rubygems_version: !ruby/object:Gem::Requirement
63
+ none: false
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ hash: 3
68
+ segments:
69
+ - 0
70
+ version: "0"
71
+ requirements: []
72
+
73
+ rubyforge_project: wcc
74
+ rubygems_version: 1.8.6
75
+ signing_key:
76
+ specification_version: 3
77
+ summary: web change checker
78
+ test_files: []
79
+