wcc 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. data/bin/wcc +6 -0
  2. data/lib/wcc.rb +508 -0
  3. metadata +79 -0
data/bin/wcc ADDED
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/ruby -KuW0
2
+ # encoding: UTF-8
3
+
4
+ require 'wcc'
5
+
6
+ WCC::Prog.run!
@@ -0,0 +1,508 @@
1
+
2
+ require 'base64'
3
+ require 'digest/md5'
4
+ require 'iconv'
5
+ require 'logger'
6
+ require 'net/http'
7
+ require 'net/https'
8
+ require 'net/smtp'
9
+ require 'optparse'
10
+ require 'pathname'
11
+ require 'singleton'
12
+ require 'tempfile'
13
+ require 'uri'
14
+ require 'yaml'
15
+
16
+ # ruby gem dependencies
17
+ require 'htmlentities'
18
+
19
+ class String
20
+ def strip_html
21
+ # remove all HTML <tags> with at least 1 character name
22
+ # and decode all HTML entities into UTF-8 characters
23
+ HTMLEntities.new.decode(self.gsub(/<[^>]+>/, ' '))
24
+ end
25
+ end
26
+
27
+ module WCC
28
+
29
+ DIFF_TIME_FMT = '%Y-%m-%d %H:%M:%S %Z'
30
+
31
+ # logging via WCC.logger.blub
32
+ def self.logger
33
+ @logger
34
+ end
35
+
36
+ def self.logger=(logger)
37
+ @logger = logger
38
+ end
39
+
40
+ class Conf
41
+ include Singleton
42
+
43
+ # use Conf like a hash containing all options
44
+ def [](key)
45
+ @options[key.to_sym] || Conf.default[key.to_sym]
46
+ end
47
+ def []=(key, val)
48
+ @options[key.to_sym] = val unless val.nil?
49
+ end
50
+
51
+ def self.default
52
+ @default_conf ||= {
53
+ :verbose => false,
54
+ :debug => false,
55
+ :simulate => false,
56
+ :clean => false,
57
+ :nomails => false,
58
+ :dir => '/var/tmp/wcc',
59
+ :tag => 'wcc',
60
+ :syslog => false,
61
+ :filterd => './filter.d',
62
+ :mailer => 'smtp',
63
+ :smtp_host => 'localhost',
64
+ :smtp_port => 25
65
+ }
66
+ end
67
+
68
+ def initialize
69
+ @options = {}
70
+
71
+ OptionParser.new do |opts|
72
+ opts.banner = "Usage: ruby wcc.rb [options] [config-yaml-file]"
73
+ opts.on('-v', '--verbose', 'Output more information') do self[:verbose] = true end
74
+ opts.on('-d', '--debug', 'Enable debug mode') do self[:debug] = true end
75
+ opts.on('-o', '--dir DIR', 'Save required files to DIR') do |dir| self[:dir] = dir end
76
+ opts.on('-s', '--simulate', 'Check for update but does not save any data') do self[:simulate] = true end
77
+ opts.on('-c', '--clean', 'Removes all hash and diff files') do self[:clean] = true end
78
+ opts.on('-t', '--tag TAG', 'Sets TAG used in output') do |t| self[:tag] = t end
79
+ opts.on('-n', '--no-mails', 'Does not send any emails') do self[:nomails] = true end
80
+ opts.on('-f', '--from MAIL', 'Set sender mail address') do |m| self[:from_mail] = m end
81
+ opts.on('--host HOST', 'Sets SMTP host') do |h| self[:host] = h end
82
+ opts.on('--port PORT', 'Sets SMTP port') do |p| self[:port] = p end
83
+ opts.on('--show-config', 'Show config after loading config file.') do self[:show_config] = true end
84
+ opts.on('-h', '-?', '--help', 'Display this screen') do
85
+ puts opts
86
+ exit
87
+ end
88
+ end.parse!
89
+
90
+ WCC.logger.progname = 'wcc'
91
+
92
+ # latest flag overrides everything
93
+ WCC.logger.level = Logger::ERROR
94
+ WCC.logger.level = Logger::INFO if self[:verbose]
95
+ WCC.logger.level = Logger::DEBUG if self[:debug]
96
+
97
+ WCC.logger.formatter = MyFormatter.new((self[:verbose] or self[:debug]))
98
+
99
+ # main
100
+ WCC.logger.info "No config file given, using default 'conf.yml' file" if ARGV.length == 0
101
+
102
+ self[:conf] = ARGV[0] || 'conf.yml'
103
+
104
+ if !File.exists?(self[:conf])
105
+ WCC.logger.fatal "Config file '#{self[:conf]}' does not exist!"
106
+ exit 1
107
+ end
108
+
109
+ WCC.logger.debug "Load config from '#{self[:conf]}'"
110
+
111
+ # may be false if file is empty
112
+ yaml = YAML.load_file(self[:conf])
113
+ if yaml.is_a?(Hash) and (yaml = yaml['conf']).is_a?(Hash)
114
+ @options[:from_mail] ||= yaml['from_addr']
115
+ @options[:dir] ||= yaml['cache_dir']
116
+ @options[:tag] ||= yaml['tag']
117
+ @options[:syslog] ||= yaml['use_syslog']
118
+ @options[:filterd] ||= yaml['filterd']
119
+
120
+ if yaml['email'].is_a?(Hash)
121
+ if yaml['email']['smtp'].is_a?(Hash)
122
+ @options[:mailer] = 'smtp'
123
+ @options[:smtp_host] ||= yaml['email']['smtp']['host']
124
+ # yaml parser should provide an integer here
125
+ @options[:smtp_port] ||= yaml['email']['smtp']['port']
126
+ end
127
+ end
128
+ end
129
+
130
+ if self[:from_mail].to_s.empty?
131
+ WCC.logger.fatal "No sender mail address given! See help."
132
+ exit 1
133
+ end
134
+
135
+ if self[:show_config]
136
+ Conf.default.merge(@options).each do |k,v|
137
+ puts " #{k.to_s} => #{self[k]}"
138
+ end
139
+ exit 0
140
+ end
141
+
142
+ # create dir for hash files
143
+ Dir.mkdir(self[:dir]) unless File.directory?(self[:dir])
144
+
145
+ if(self[:clean])
146
+ WCC.logger.warn "Cleanup hash and diff files"
147
+ Dir.foreach(self[:dir]) do |f|
148
+ File.delete(self.file(f)) if f =~ /^.*\.(md5|site)$/
149
+ end
150
+ end
151
+
152
+ # read filter.d
153
+ Dir[File.join(self[:filterd], '*.rb')].each { |file| require file }
154
+ end
155
+
156
+ def self.sites
157
+ return @sites unless @sites.nil?
158
+
159
+ @sites = []
160
+
161
+ WCC.logger.debug "Load sites from '#{Conf[:conf]}'"
162
+
163
+ # may be false if file is empty
164
+ yaml = YAML.load_file(Conf[:conf])
165
+
166
+ yaml['sites'].to_a.each do |yaml_site|
167
+ filterrefs = []
168
+ (yaml_site['filters'] || []).each do |entry|
169
+ if entry.is_a?(Hash)
170
+ # hash containing only one key (filter id),
171
+ # the value is the argument hash
172
+ id = entry.keys[0]
173
+ filterrefs << FilterRef.new(id, entry[id])
174
+ else entry.is_a?(String)
175
+ filterrefs << FilterRef.new(entry, {})
176
+ end
177
+ end
178
+
179
+ if not yaml_site['cookie'].nil?
180
+ cookie = File.open(yaml_site['cookie'], 'r') { |f| f.read }
181
+ end
182
+
183
+ @sites << Site.new(
184
+ yaml_site['url'],
185
+ yaml_site['strip_html'] || true,
186
+ yaml_site['emails'].map { |m| MailAddress.new(m) } || [],
187
+ filterrefs,
188
+ yaml_site['auth'] || {},
189
+ cookie)
190
+ end if yaml
191
+
192
+ WCC.logger.debug @sites.length.to_s + (@sites.length == 1 ? ' site' : ' sites') + " loaded\n" +
193
+ @sites.map { |s| " #{s.uri.host.to_s}\n url: #{s.uri.to_s}\n id: #{s.id}" }.join("\n")
194
+
195
+ @sites
196
+ end
197
+
198
+ def self.mailer
199
+ if @mailer.nil?
200
+ # smtp mailer
201
+ if Conf[:mailer] == 'smtp'
202
+ @mailer = SmtpMailer.new(Conf[:smtp_host], Conf[:smtp_port])
203
+ end
204
+ end
205
+ @mailer
206
+ end
207
+
208
+ def self.file(path = nil) File.join(self[:dir], path) end
209
+ def self.simulate?; self[:simulate] end
210
+ def self.send_mails?; !self[:nomails] end
211
+ def self.[](key); Conf.instance[key] end
212
+ end
213
+
214
+ class FilterRef
215
+ attr_reader :id, :arguments
216
+
217
+ def initialize(id, arguments)
218
+ @id = id
219
+ @arguments = arguments
220
+ end
221
+
222
+ def to_s; @id end
223
+ end
224
+
225
+ class Site
226
+ attr_reader :uri, :emails, :filters, :auth, :cookie, :id
227
+
228
+ def initialize(url, strip_html, emails, filters, auth, cookie)
229
+ @uri = URI.parse(url)
230
+ @strip_html = strip_html
231
+ @emails = emails.is_a?(Array) ? emails : [emails]
232
+ @filters = filters.is_a?(Array) ? filters : [filters]
233
+ @auth = auth
234
+ @cookie = cookie
235
+ @id = Digest::MD5.hexdigest(url.to_s)[0...8]
236
+ # invalid hashes are ""
237
+ load_hash
238
+ end
239
+
240
+ def strip_html?; @strip_html end
241
+
242
+ def new?
243
+ hash.empty?
244
+ end
245
+
246
+ def load_hash
247
+ file = Conf.file(@id + '.md5')
248
+ if File.exists?(file)
249
+ WCC.logger.debug "Load hash from file '#{file}'"
250
+ File.open(file, 'r') { |f| @hash = f.gets; break }
251
+ else
252
+ WCC.logger.info "Site #{uri.host} was never checked before."
253
+ @hash = ""
254
+ end
255
+ end
256
+
257
+ def load_content
258
+ file = Conf.file(@id + '.site')
259
+ if File.exists?(file)
260
+ File.open(file, 'r') { |f| @content = f.read }
261
+ end
262
+ end
263
+
264
+ def hash; @hash end
265
+
266
+ def hash=(hash)
267
+ @hash = hash
268
+ File.open(Conf.file(@id + '.md5'), 'w') { |f| f.write(@hash) } unless Conf.simulate?
269
+ end
270
+
271
+ def content; load_content if @content.nil?; @content end
272
+
273
+ def content=(content)
274
+ @content = content
275
+ File.open(Conf.file(@id + '.site'), 'w') { |f| f.write(@content) } unless Conf.simulate?
276
+ end
277
+ end
278
+
279
+ class MailAddress
280
+ def initialize(email)
281
+ email = email.to_s if email.is_a?(MailAddress)
282
+ @email = email.strip
283
+ end
284
+
285
+ def name
286
+ if @email =~ /^[\w\s]+<.+@[^@]+>$/
287
+ @email.gsub(/<.+?>/, '').strip
288
+ else
289
+ @email.split("@")[0...-1].join("@")
290
+ end
291
+ end
292
+
293
+ def address
294
+ if @email =~ /^[\w\s]+<.+@[^@]+>$/
295
+ @email.match(/<([^>]+@[^@>]+)>/)[1]
296
+ else
297
+ @email
298
+ end
299
+ end
300
+
301
+ def to_s; @email end
302
+ end
303
+
304
+ class Mail
305
+ attr_reader :title, :message
306
+
307
+ def initialize(title, message, options = {})
308
+ @title = title
309
+ @message = message
310
+ @options = {:from => MailAddress.new(Conf[:from_mail])}
311
+ @options[:from] = MailAddress.new(options[:from]) unless options[:from].nil?
312
+ end
313
+
314
+ def send(tos = [])
315
+ Conf.mailer.send(self, @options[:from], tos)
316
+ end
317
+ end
318
+
319
+ class SmtpMailer
320
+ def initialize(host, port)
321
+ @host = host
322
+ @port = port
323
+ end
324
+
325
+ def send(mail, from, to = [])
326
+ Net::SMTP.start(@host, @port) do |smtp|
327
+ to.each do |toaddr|
328
+ msg = "From: #{from.name} <#{from.address}>\n"
329
+ msg += "To: #{toaddr}\n"
330
+ msg += "Subject: #{mail.title.gsub(/\s+/, ' ')}\n"
331
+ msg += "Content-Type: text/plain; charset=\"utf-8\"\n"
332
+ msg += "Content-Transfer-Encoding: base64\n"
333
+ msg += "\n"
334
+ msg += Base64.encode64(mail.message)
335
+
336
+ smtp.send_message(msg, from.address, toaddr.address)
337
+ end
338
+ end
339
+ rescue
340
+ WCC.logger.fatal "Cannot send mails at #{@host}:#{@port} : #{$!.to_s}"
341
+ end
342
+ end
343
+
344
+ class Filter
345
+ @@filters = {}
346
+
347
+ def self.add(id, &block)
348
+ WCC.logger.info "Adding filter '#{id}'"
349
+ @@filters[id] = block
350
+ end
351
+
352
+ def self.accept(data, filters)
353
+ return true if filters.nil?
354
+
355
+ WCC.logger.info "Testing with filters: #{filters.join(', ')}"
356
+
357
+ filters.each do |filterref|
358
+ block = @@filters[filterref.id]
359
+
360
+ if block.nil?
361
+ WCC.logger.error "Requested filter '#{filterref.id}' not found, skipping it."
362
+ next
363
+ end
364
+
365
+ if not block.call(data, filterref.arguments)
366
+ WCC.logger.info "Filter #{filterref.id} failed!"
367
+ return false
368
+ end
369
+ end
370
+ true
371
+ end
372
+ end
373
+
374
+ class MyFormatter
375
+ def initialize(use_color = true)
376
+ @color = use_color
377
+ end
378
+
379
+ def white; "\e[1;37m" end
380
+ def cyan; "\e[1;36m" end
381
+ def magenta;"\e[1;35m" end
382
+ def blue; "\e[1;34m" end
383
+ def yellow; "\e[1;33m" end
384
+ def green; "\e[1;32m" end
385
+ def red; "\e[1;31m" end
386
+ def black; "\e[1;30m" end
387
+ def rst; "\e[0m" end
388
+
389
+ def call(lvl, time, progname, msg)
390
+ text = "%s: %s" % [lvl, msg.to_s]
391
+ if @color
392
+ return [magenta, text, rst, "\n"].join if lvl == "FATAL"
393
+ return [red, text, rst, "\n"].join if lvl == "ERROR"
394
+ return [yellow, text, rst, "\n"].join if lvl == "WARN"
395
+ end
396
+ [text, "\n"].join
397
+ end
398
+ end
399
+
400
+ class Prog
401
+
402
+ # TODO: move to Site
403
+ def self.fetch(site)
404
+ http = Net::HTTP.new(site.uri.host, site.uri.port)
405
+ if site.uri.is_a?(URI::HTTPS)
406
+ http.use_ssl = true
407
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE
408
+ end
409
+ http.start do |http|
410
+ req = Net::HTTP::Get.new(site.uri.request_uri)
411
+ if site.auth['type'] == 'basic'
412
+ WCC.logger.debug "Doing basic auth"
413
+ req.basic_auth(site.auth['username'], site.auth['password'])
414
+ end
415
+ if not site.cookie.nil?
416
+ req.add_field("Cookie", site.cookie)
417
+ end
418
+ http.request(req)
419
+ end
420
+ end
421
+
422
+ def self.checkForUpdate(site)
423
+ WCC.logger.info "Requesting '#{site.uri.to_s}'"
424
+ begin
425
+ res = fetch(site)
426
+ rescue StandardError, Timeout::Error => ex
427
+ WCC.logger.error "Cannot connect to #{site.uri.to_s} : #{ex.to_s}"
428
+ return false
429
+ end
430
+ if not res.kind_of?(Net::HTTPOK)
431
+ WCC.logger.error "Site #{site.uri.to_s} returned #{res.code} code, skipping it."
432
+ return false
433
+ end
434
+
435
+ new_content = res.body
436
+
437
+ # detect encoding from http header, meta element, default utf-8
438
+ # do not use utf-8 regex because it will fail on non utf-8 pages
439
+ encoding = (res['content-type'].to_s.match(/;\s*charset=([A-Za-z0-9-]*)/i).to_a[1] ||
440
+ new_content.match(/<meta.*charset=([a-zA-Z0-9-]*).*/i).to_a[1]).to_s.downcase || 'utf-8'
441
+
442
+ WCC.logger.info "Encoding is '#{encoding}'"
443
+
444
+ # convert to utf-8
445
+ begin
446
+ new_content = Iconv.conv('utf-8', encoding, new_content)
447
+ rescue => ex
448
+ WCC.logger.error "Cannot convert site from '#{encoding}': #{ex.to_s}"
449
+ return false
450
+ end
451
+
452
+ # strip html _before_ diffing
453
+ new_content = new_content.strip_html if site.strip_html?
454
+ new_hash = Digest::MD5.hexdigest(new_content)
455
+
456
+ WCC.logger.debug "Compare hashes\n old: #{site.hash.to_s}\n new: #{new_hash.to_s}"
457
+ return false if new_hash == site.hash
458
+
459
+ # do not try diff or anything if site was never checked before
460
+ if site.new?
461
+ # update content
462
+ site.hash, site.content = new_hash, new_content
463
+
464
+ # set custom diff message
465
+ diff = "Site was first checked so no diff was possible."
466
+ else
467
+ # save old site to tmp file
468
+ old_site_file = Tempfile.new("wcc-#{site.id}-")
469
+ old_site_file.write(site.content)
470
+ old_site_file.close
471
+
472
+ # calculate labels before updating
473
+ old_label = "OLD (%s)" % File.mtime(Conf.file(site.id + ".md5")).strftime(DIFF_TIME_FMT)
474
+ new_label = "NEW (%s)" % Time.now.strftime(DIFF_TIME_FMT)
475
+
476
+ # do update
477
+ site.hash, site.content = new_hash, new_content
478
+
479
+ # diff between OLD and NEW
480
+ diff = %x[diff -U 1 --label "#{old_label}" --label "#{new_label}" #{old_site_file.path} #{Conf.file(site.id + '.site')}]
481
+ end
482
+
483
+ return false if not Filter.accept(diff, site.filters)
484
+
485
+ Mail.new(
486
+ "[#{Conf[:tag]}] #{site.uri.host} changed",
487
+ "Change at #{site.uri.to_s} - diff follows:\n\n#{diff}"
488
+ ).send(site.emails) if Conf.send_mails?
489
+
490
+ system("logger -t '#{Conf[:tag]}' 'Change at #{site.uri.to_s} (tag #{site.id}) detected'") if Conf[:syslog]
491
+
492
+ true
493
+ end
494
+
495
+ # main
496
+ def self.run!
497
+ WCC.logger = Logger.new(STDOUT)
498
+
499
+ Conf.sites.each do |site|
500
+ if checkForUpdate(site)
501
+ WCC.logger.warn "#{site.uri.host.to_s} has an update!"
502
+ else
503
+ WCC.logger.info "#{site.uri.host.to_s} is unchanged"
504
+ end
505
+ end
506
+ end
507
+ end
508
+ end
metadata ADDED
@@ -0,0 +1,79 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: wcc
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - Christian Nicolai
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-09-30 00:00:00 Z
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: htmlentities
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ hash: 3
29
+ segments:
30
+ - 0
31
+ version: "0"
32
+ type: :runtime
33
+ version_requirements: *id001
34
+ description: wcc tracks changes of websites and notifies you by email.
35
+ email: chrnicolai@gmail.com
36
+ executables:
37
+ - wcc
38
+ extensions: []
39
+
40
+ extra_rdoc_files: []
41
+
42
+ files:
43
+ - bin/wcc
44
+ - lib/wcc.rb
45
+ homepage: https://github.com/cmur2/wcc
46
+ licenses: []
47
+
48
+ post_install_message:
49
+ rdoc_options: []
50
+
51
+ require_paths:
52
+ - lib
53
+ required_ruby_version: !ruby/object:Gem::Requirement
54
+ none: false
55
+ requirements:
56
+ - - ">="
57
+ - !ruby/object:Gem::Version
58
+ hash: 3
59
+ segments:
60
+ - 0
61
+ version: "0"
62
+ required_rubygems_version: !ruby/object:Gem::Requirement
63
+ none: false
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ hash: 3
68
+ segments:
69
+ - 0
70
+ version: "0"
71
+ requirements: []
72
+
73
+ rubyforge_project: wcc
74
+ rubygems_version: 1.8.6
75
+ signing_key:
76
+ specification_version: 3
77
+ summary: web change checker
78
+ test_files: []
79
+