sitediff 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -5,6 +5,6 @@ if $0 == __FILE__
5
5
  $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
6
6
  end
7
7
 
8
- require 'sitediff'
8
+ require 'sitediff/cli'
9
9
 
10
10
  SiteDiff::Cli.start
@@ -1,11 +1,10 @@
1
1
  #!/bin/env ruby
2
- require 'sitediff/cli.rb'
3
- require 'sitediff/config.rb'
4
- require 'sitediff/result.rb'
5
- require 'sitediff/uriwrapper'
6
- require 'sitediff/util/cache'
7
- require 'typhoeus'
2
+ require 'sitediff/config'
3
+ require 'sitediff/fetch'
4
+ require 'sitediff/result'
5
+ require 'pathname'
8
6
  require 'rainbow'
7
+ require 'yaml'
9
8
 
10
9
  class SiteDiff
11
10
  # path to misc. static files (e.g. erb, css files)
@@ -14,20 +13,28 @@ class SiteDiff
14
13
  # subdirectory containing all failing diffs
15
14
  DIFFS_DIR = 'diffs'
16
15
 
16
+ # files in output
17
+ FAILURES_FILE = 'failures.txt'
18
+ REPORT_FILE = 'report.html'
19
+ SETTINGS_FILE = 'settings.yaml'
20
+
17
21
  # label will be colorized and str will not be.
18
22
  # type dictates the color: can be :success, :error, or :failure
19
- def self.log(str, type=nil, label=nil)
23
+ def self.log(str, type=:info, label=nil)
20
24
  label = label ? "[sitediff] #{label}" : '[sitediff]'
21
25
  bg = fg = nil
22
26
  case type
23
- when :success
27
+ when :info
28
+ when :diff_success
24
29
  bg = :green
25
30
  fg = :black
26
- when :failure
31
+ when :diff_failure
27
32
  bg = :red
28
- when :error
33
+ when :warn
29
34
  bg = :yellow
30
35
  fg = :black
36
+ when :error
37
+ bg = :red
31
38
  end
32
39
  label = Rainbow(label)
33
40
  label = label.bg(bg) if bg
@@ -43,88 +50,97 @@ class SiteDiff
43
50
  @config.after['url']
44
51
  end
45
52
 
46
- def cache=(file)
47
- # FIXME: Non-global cache would be nice
48
- return unless file
49
- if Gem::Version.new(Typhoeus::VERSION) >= Gem::Version.new('0.6.4')
50
- Typhoeus::Config.cache = SiteDiff::Util::Cache.new(file)
51
- else
52
- # Bug, see: https://github.com/typhoeus/typhoeus/pull/296
53
- SiteDiff::log("Cache unsupported on Typhoeus version < 0.6.4", :failure)
53
+ def initialize(config, cache, verbose=true)
54
+ @cache = cache
55
+ @verbose = verbose
56
+
57
+ # Check for single-site mode
58
+ validate_opts = {}
59
+ if !config.before['url'] && @cache.tag?(:before)
60
+ raise SiteDiffException,
61
+ "A cached 'before' is required for single-site mode" \
62
+ unless @cache.read_tags.include?(:before)
63
+ validate_opts[:need_before] = false
54
64
  end
55
- end
65
+ config.validate(validate_opts)
56
66
 
57
- def initialize(config, cache)
58
- config.validate
59
67
  @config = config
60
- self.cache = cache
61
68
  end
62
69
 
63
- # Sanitize an HTML string based on configuration for either before or after
64
- def sanitize(html, pos)
65
- Sanitize::sanitize(html, @config.send(pos))
70
+ # Sanitize HTML
71
+ def sanitize(path, read_results)
72
+ [:before, :after].map do |tag|
73
+ html = read_results[tag].content
74
+ config = @config.send(tag)
75
+ Sanitizer.new(html, config, :path => path).sanitize
76
+ end
66
77
  end
67
78
 
68
- # Queues fetching before and after URLs with a Typhoeus::Hydra instance
69
- #
70
- # Upon completion of both before and after, prints and saves the diff to
71
- # @results.
72
- def queue_read(hydra, path)
73
- # ( :before | after ) => ReadResult object
74
- reads = {}
75
- [:before, :after].each do |pos|
76
- uri = UriWrapper.new(send(pos) + path)
77
-
78
- uri.queue(hydra) do |res|
79
- reads[pos] = res
80
- next unless reads.size == 2
81
-
82
- # we have read both before and after; calculate diff
83
- if error = reads[:before].error || reads[:after].error
84
- diff = Result.new(path, nil, nil, error)
85
- else
86
- diff = Result.new(path, sanitize(reads[:before].content, :before),
87
- sanitize(reads[:after].content,:after), nil)
88
- end
89
- diff.log
90
- @results[path] = diff
91
- end
79
+ # Process a set of read results
80
+ def process_results(path, read_results)
81
+ if error = read_results[:before].error || read_results[:after].error
82
+ diff = Result.new(path, nil, nil, error)
83
+ else
84
+ diff = Result.new(path, *sanitize(path, read_results), nil)
85
+ end
86
+ @results[path] = diff
87
+
88
+ # Print results in order!
89
+ while next_diff = @results[@ordered.first]
90
+ next_diff.log(@verbose)
91
+ @ordered.shift
92
92
  end
93
93
  end
94
94
 
95
- # Perform the comparison
95
+ # Perform the comparison, populate @results and return the number of failing
96
+ # paths (paths with non-zero diff).
96
97
  def run
97
- # Map of path -> Result object, queue_read sets callbacks to populate this
98
+ # Map of path -> Result object, populated by process_results
98
99
  @results = {}
100
+ @ordered = @config.paths.dup
101
+
102
+ unless @cache.read_tags.empty?
103
+ SiteDiff.log("Using sites from cache: " +
104
+ @cache.read_tags.sort.join(', '))
105
+ end
99
106
 
100
- hydra = Typhoeus::Hydra.new(max_concurrency: 3)
101
- @config.paths.each { |path| queue_read(hydra, path) }
102
- hydra.run
107
+ fetcher = Fetch.new(@cache, @config.paths,
108
+ :before => before, :after => after)
109
+ fetcher.run(&self.method(:process_results))
103
110
 
104
111
  # Order by original path order
105
112
  @results = @config.paths.map { |p| @results[p] }
113
+ return results.map{ |r| r unless r.success? }.compact.length
106
114
  end
107
115
 
108
116
  # Dump results to disk
109
- def dump(dir, report_before, report_after, failing_paths)
117
+ def dump(dir, report_before, report_after)
110
118
  report_before ||= before
111
119
  report_after ||= after
112
- FileUtils.mkdir_p(dir)
120
+ dir = Pathname.new(dir)
121
+ dir.mkpath unless dir.directory?
113
122
 
114
123
  # store diffs of each failing case, first wipe out existing diffs
115
- diff_dir = File.join(dir, DIFFS_DIR)
116
- FileUtils.rm_rf(diff_dir)
124
+ diff_dir = dir + DIFFS_DIR
125
+ diff_dir.rmtree if diff_dir.exist?
117
126
  results.each { |r| r.dump(dir) if r.status == Result::STATUS_FAILURE }
118
- SiteDiff::log "All diff files were dumped inside #{dir}"
127
+ SiteDiff::log "All diff files were dumped inside #{dir.expand_path}"
119
128
 
120
129
  # store failing paths
121
- SiteDiff::log "Writing failures to #{failing_paths}"
122
- File.open(failing_paths, 'w') do |f|
130
+ failures = dir + FAILURES_FILE
131
+ SiteDiff::log "Writing failures to #{failures.expand_path}"
132
+ failures.open('w') do |f|
123
133
  results.each { |r| f.puts r.path unless r.success? }
124
134
  end
125
135
 
126
136
  # create report of results
127
- report = Diff::generate_html_report(results, report_before, report_after)
128
- File.open(File.join(dir, "/report.html") , 'w') { |f| f.write(report) }
137
+ report = Diff::generate_html_report(results, report_before, report_after,
138
+ @cache)
139
+ dir.+(REPORT_FILE).open('w') { |f| f.write(report) }
140
+
141
+ # serve some settings
142
+ settings = { 'before' => report_before, 'after' => report_after,
143
+ 'cached' => @cache.read_tags.map { |t| t.to_s } }
144
+ dir.+(SETTINGS_FILE).open('w') { |f| YAML.dump(settings, f) }
129
145
  end
130
146
  end
@@ -0,0 +1,61 @@
1
+ require 'set'
2
+
3
+ class SiteDiff
4
+ class Cache
5
+ DEFAULT_FILENAME = 'cache.db'
6
+
7
+ attr_accessor :read_tags, :write_tags
8
+
9
+ def initialize(opts = {})
10
+ @file = opts[:file] || DEFAULT_FILENAME
11
+ @create = opts[:create]
12
+ @read_tags = Set.new
13
+ @write_tags = Set.new
14
+ end
15
+
16
+ def close; @dbm.close if defined? @dbm; end
17
+
18
+ # Is a tag cached?
19
+ def tag?(tag)
20
+ open
21
+ @dbm[tag.to_s]
22
+ end
23
+
24
+ def get(tag, path)
25
+ return nil unless @read_tags.include? tag
26
+ open or return nil
27
+ val = @dbm[key(tag, path)]
28
+ return val && Marshal.load(val)
29
+ end
30
+
31
+ def set(tag, path, result)
32
+ return unless @write_tags.include? tag
33
+ open or return
34
+ @dbm[tag.to_s] = 'TRUE'
35
+ @dbm[key(tag, path)] = Marshal.dump(result)
36
+ end
37
+
38
+ private
39
+ def key(tag, path)
40
+ # Ensure encoding stays the same!
41
+ Marshal.dump([tag, path.encode('UTF-8')])
42
+ end
43
+
44
+ # Ensure the DB is open
45
+ def open
46
+ # DBM adds an extra .db, ugh
47
+ return false unless @create || File.exist?(@file) ||
48
+ File.exist?(@file + '.db')
49
+ return true if defined? @dbm
50
+
51
+ begin
52
+ require 'gdbm'
53
+ @dbm = GDBM.new(@file)
54
+ rescue LoadError
55
+ require 'dbm'
56
+ @dbm = DBM.new(@file)
57
+ end
58
+ return true
59
+ end
60
+ end
61
+ end
@@ -1,12 +1,18 @@
1
1
  require 'thor'
2
- require 'sitediff/diff'
3
- require 'sitediff/sanitize'
4
- require 'sitediff/util/webserver'
5
- require 'open-uri'
6
- require 'uri'
2
+ require 'sitediff'
3
+ require 'sitediff/cache'
4
+ require 'sitediff/config'
5
+ require 'sitediff/config/creator'
6
+ require 'sitediff/fetch'
7
+ require 'sitediff/webserver/resultserver'
7
8
 
8
9
  class SiteDiff
9
10
  class Cli < Thor
11
+ class_option 'directory',
12
+ :type => :string,
13
+ :aliases => '-C',
14
+ :desc => "Go to a given directory before running."
15
+
10
16
  # Thor, by default, exits with 0 no matter what!
11
17
  def self.exit_on_failure?
12
18
  true
@@ -21,11 +27,15 @@ class SiteDiff
21
27
  :type => :string,
22
28
  :default => File.join('.', 'output'),
23
29
  :desc => "Location to write the output to."
24
- option 'paths',
30
+ option 'paths-file',
25
31
  :type => :string,
26
32
  :desc => 'Paths are read (one at a line) from PATHS: ' +
27
33
  'useful for iterating over sanitization rules',
28
34
  :aliases => '--paths-from-file'
35
+ option 'paths',
36
+ :type => :array,
37
+ :aliases => '-p',
38
+ :desc => "Fetch only these specific paths"
29
39
  option 'before',
30
40
  :type => :string,
31
41
  :desc => "URL used to fetch the before HTML. Acts as a prefix to specified paths",
@@ -42,16 +52,29 @@ class SiteDiff
42
52
  :type => :string,
43
53
  :desc => "After URL to use for reporting purposes. Useful if port forwarding.",
44
54
  :aliases => '--after-url-report'
45
- option 'cache',
55
+ option 'cached',
46
56
  :type => :string,
47
- :desc => "Filename to use for caching requests.",
48
- :lazy_default => 'cache.db'
57
+ :enum => %w[none all before after],
58
+ :default => 'before',
59
+ :desc => "Use the cached version of these sites, if available."
60
+ option 'quiet',
61
+ :type => :boolean,
62
+ :aliases => '-q',
63
+ :default => false,
64
+ :desc => "Show the difference between versions for each page"
49
65
  desc "diff [OPTIONS] [CONFIGFILES]", "Perform systematic diff on given URLs"
50
66
  def diff(*config_files)
51
- config = SiteDiff::Config.new(config_files)
67
+ config = chdir(config_files)
52
68
 
53
69
  # override config based on options
54
- if paths_file = options['paths']
70
+ paths = options['paths']
71
+ if paths_file = options['paths-file']
72
+ if paths then
73
+ SiteDiff::log "Can't have both --paths-file and --paths", :error
74
+ exit -1
75
+ end
76
+
77
+ paths_file = Pathname.new(paths_file).expand_path
55
78
  unless File.exists? paths_file
56
79
  raise Config::InvalidConfig,
57
80
  "Paths file '#{paths_file}' not found!"
@@ -59,32 +82,130 @@ class SiteDiff
59
82
  SiteDiff::log "Reading paths from: #{paths_file}"
60
83
  config.paths = File.readlines(paths_file)
61
84
  end
85
+ config.paths = paths if paths
86
+
62
87
  config.before['url'] = options['before'] if options['before']
63
88
  config.after['url'] = options['after'] if options['after']
64
89
 
65
- sitediff = SiteDiff.new(config, options['cache'])
66
- sitediff.run
90
+ # Setup cache
91
+ cache = SiteDiff::Cache.new(:create => options['cached'] != 'none')
92
+ cache.read_tags << :before if %w[before all].include?(options['cached'])
93
+ cache.read_tags << :after if %w[after all].include?(options['cached'])
94
+ cache.write_tags << :before << :after
95
+
96
+ sitediff = SiteDiff.new(config, cache, !options['quiet'])
97
+ num_failing = sitediff.run
98
+ exit_code = (num_failing > 0) ? 2 : 0;
67
99
 
68
- failing_paths = File.join(options['dump-dir'], 'failures.txt')
69
100
  sitediff.dump(options['dump-dir'], options['before-report'],
70
- options['after-report'], failing_paths)
101
+ options['after-report'])
71
102
  rescue Config::InvalidConfig => e
72
- SiteDiff.log "Invalid configuration: #{e.message}", :failure
103
+ SiteDiff.log "Invalid configuration: #{e.message}", :error
104
+ rescue SiteDiffException => e
105
+ SiteDiff.log e.message, :error
106
+ else # no exception was raised
107
+ # Thor::Error --> exit(1), guaranteed by exit_on_failure?
108
+ # Failing diff --> exit(2), populated above
109
+ exit(exit_code)
73
110
  end
74
111
 
75
112
  option :port,
76
113
  :type => :numeric,
77
- :default => SiteDiff::Util::Webserver::DEFAULT_PORT,
114
+ :default => SiteDiff::Webserver::DEFAULT_PORT,
78
115
  :desc => 'The port to serve on'
79
- option :directory,
116
+ option 'dump-dir',
80
117
  :type => :string,
81
118
  :default => 'output',
82
- :desc => 'The directory to serve',
83
- :aliases => '--dump-dir'
119
+ :desc => 'The directory to serve'
120
+ option :browse,
121
+ :type => :boolean,
122
+ :default => true,
123
+ :desc => "Whether to open the served content in your browser"
84
124
  desc "serve [OPTIONS]", "Serve the sitediff output directory over HTTP"
85
- def serve
86
- SiteDiff::Util::Webserver.serve(options[:port], options[:directory],
87
- :announce => true).wait
125
+ def serve(*config_files)
126
+ config = chdir(config_files, :config => false)
127
+
128
+ cache = Cache.new
129
+ cache.read_tags << :before << :after
130
+
131
+ SiteDiff::Webserver::ResultServer.new(
132
+ options[:port],
133
+ options['dump-dir'],
134
+ :browse => options[:browse],
135
+ :cache => cache,
136
+ :config => config,
137
+ ).wait
138
+ end
139
+
140
+ option :output,
141
+ :type => :string,
142
+ :default => 'sitediff',
143
+ :desc => 'Where to place the configuration',
144
+ :aliases => ['-o']
145
+ option :depth,
146
+ :type => :numeric,
147
+ :default => 3,
148
+ :desc => 'How deeply to crawl the given site'
149
+ option :rules,
150
+ :type => :string,
151
+ :enum => %w[yes no disabled],
152
+ :default => 'disabled',
153
+ :desc => 'Whether rules for the site should be auto-created'
154
+ desc "init URL [URL]", "Create a sitediff configuration"
155
+ def init(*urls)
156
+ unless (1..2).include? urls.size
157
+ SiteDiff.log "sitediff init requires one or two URLs", :error
158
+ exit 2
159
+ end
160
+
161
+ chdir([], :search => false)
162
+ creator = SiteDiff::Config::Creator.new(*urls)
163
+ creator.create(
164
+ :depth => options[:depth],
165
+ :directory => options[:output],
166
+ :rules => options[:rules] != 'no',
167
+ :rules_disabled => (options[:rules] == 'disabled'),
168
+ ) do |tag, info|
169
+ SiteDiff.log "Visited #{info.uri}, cached"
170
+ end
171
+
172
+ SiteDiff.log "Created #{creator.config_file.expand_path}", :success
173
+ SiteDiff.log "You can now run 'sitediff diff'", :success
174
+ end
175
+
176
+ option :url,
177
+ :type => :string,
178
+ :desc => 'A custom base URL to fetch from'
179
+ desc "store [CONFIGFILES]",
180
+ "Cache the current contents of a site for later comparison"
181
+ def store(*config_files)
182
+ config = chdir(config_files)
183
+ config.validate(:need_before => false)
184
+
185
+ cache = SiteDiff::Cache.new(:create => true)
186
+ cache.write_tags << :before
187
+
188
+ base = options[:url] || config.after['url']
189
+ fetcher = SiteDiff::Fetch.new(cache, config.paths, :before => base)
190
+ fetcher.run do |path, res|
191
+ SiteDiff.log "Visited #{path}, cached"
192
+ end
193
+ end
194
+
195
+ private
196
+ def chdir(files, opts = {})
197
+ opts = { :config => true, :search => true }.merge(opts)
198
+
199
+ dir = options['directory']
200
+ Dir.chdir(dir) if dir
201
+
202
+ return unless opts[:search]
203
+ begin
204
+ SiteDiff::Config.new(files, :search => !dir)
205
+ rescue SiteDiff::Config::ConfigNotFound => e
206
+ raise if opts[:config]
207
+ # If no config required, allow it to pass
208
+ end
88
209
  end
89
210
  end
90
211
  end