sitediff 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,6 +5,6 @@ if $0 == __FILE__
5
5
  $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
6
6
  end
7
7
 
8
- require 'sitediff'
8
+ require 'sitediff/cli'
9
9
 
10
10
  SiteDiff::Cli.start
@@ -1,11 +1,10 @@
1
1
  #!/bin/env ruby
2
- require 'sitediff/cli.rb'
3
- require 'sitediff/config.rb'
4
- require 'sitediff/result.rb'
5
- require 'sitediff/uriwrapper'
6
- require 'sitediff/util/cache'
7
- require 'typhoeus'
2
+ require 'sitediff/config'
3
+ require 'sitediff/fetch'
4
+ require 'sitediff/result'
5
+ require 'pathname'
8
6
  require 'rainbow'
7
+ require 'yaml'
9
8
 
10
9
  class SiteDiff
11
10
  # path to misc. static files (e.g. erb, css files)
@@ -14,20 +13,28 @@ class SiteDiff
14
13
  # subdirectory containing all failing diffs
15
14
  DIFFS_DIR = 'diffs'
16
15
 
16
+ # files in output
17
+ FAILURES_FILE = 'failures.txt'
18
+ REPORT_FILE = 'report.html'
19
+ SETTINGS_FILE = 'settings.yaml'
20
+
17
21
  # label will be colorized and str will not be.
18
22
  # type dictates the color: can be :success, :error, or :failure
19
- def self.log(str, type=nil, label=nil)
23
+ def self.log(str, type=:info, label=nil)
20
24
  label = label ? "[sitediff] #{label}" : '[sitediff]'
21
25
  bg = fg = nil
22
26
  case type
23
- when :success
27
+ when :info
28
+ when :diff_success
24
29
  bg = :green
25
30
  fg = :black
26
- when :failure
31
+ when :diff_failure
27
32
  bg = :red
28
- when :error
33
+ when :warn
29
34
  bg = :yellow
30
35
  fg = :black
36
+ when :error
37
+ bg = :red
31
38
  end
32
39
  label = Rainbow(label)
33
40
  label = label.bg(bg) if bg
@@ -43,88 +50,97 @@ class SiteDiff
43
50
  @config.after['url']
44
51
  end
45
52
 
46
- def cache=(file)
47
- # FIXME: Non-global cache would be nice
48
- return unless file
49
- if Gem::Version.new(Typhoeus::VERSION) >= Gem::Version.new('0.6.4')
50
- Typhoeus::Config.cache = SiteDiff::Util::Cache.new(file)
51
- else
52
- # Bug, see: https://github.com/typhoeus/typhoeus/pull/296
53
- SiteDiff::log("Cache unsupported on Typhoeus version < 0.6.4", :failure)
53
+ def initialize(config, cache, verbose=true)
54
+ @cache = cache
55
+ @verbose = verbose
56
+
57
+ # Check for single-site mode
58
+ validate_opts = {}
59
+ if !config.before['url'] && @cache.tag?(:before)
60
+ raise SiteDiffException,
61
+ "A cached 'before' is required for single-site mode" \
62
+ unless @cache.read_tags.include?(:before)
63
+ validate_opts[:need_before] = false
54
64
  end
55
- end
65
+ config.validate(validate_opts)
56
66
 
57
- def initialize(config, cache)
58
- config.validate
59
67
  @config = config
60
- self.cache = cache
61
68
  end
62
69
 
63
- # Sanitize an HTML string based on configuration for either before or after
64
- def sanitize(html, pos)
65
- Sanitize::sanitize(html, @config.send(pos))
70
+ # Sanitize HTML
71
+ def sanitize(path, read_results)
72
+ [:before, :after].map do |tag|
73
+ html = read_results[tag].content
74
+ config = @config.send(tag)
75
+ Sanitizer.new(html, config, :path => path).sanitize
76
+ end
66
77
  end
67
78
 
68
- # Queues fetching before and after URLs with a Typhoeus::Hydra instance
69
- #
70
- # Upon completion of both before and after, prints and saves the diff to
71
- # @results.
72
- def queue_read(hydra, path)
73
- # ( :before | after ) => ReadResult object
74
- reads = {}
75
- [:before, :after].each do |pos|
76
- uri = UriWrapper.new(send(pos) + path)
77
-
78
- uri.queue(hydra) do |res|
79
- reads[pos] = res
80
- next unless reads.size == 2
81
-
82
- # we have read both before and after; calculate diff
83
- if error = reads[:before].error || reads[:after].error
84
- diff = Result.new(path, nil, nil, error)
85
- else
86
- diff = Result.new(path, sanitize(reads[:before].content, :before),
87
- sanitize(reads[:after].content,:after), nil)
88
- end
89
- diff.log
90
- @results[path] = diff
91
- end
79
+ # Process a set of read results
80
+ def process_results(path, read_results)
81
+ if error = read_results[:before].error || read_results[:after].error
82
+ diff = Result.new(path, nil, nil, error)
83
+ else
84
+ diff = Result.new(path, *sanitize(path, read_results), nil)
85
+ end
86
+ @results[path] = diff
87
+
88
+ # Print results in order!
89
+ while next_diff = @results[@ordered.first]
90
+ next_diff.log(@verbose)
91
+ @ordered.shift
92
92
  end
93
93
  end
94
94
 
95
- # Perform the comparison
95
+ # Perform the comparison, populate @results and return the number of failing
96
+ # paths (paths with non-zero diff).
96
97
  def run
97
- # Map of path -> Result object, queue_read sets callbacks to populate this
98
+ # Map of path -> Result object, populated by process_results
98
99
  @results = {}
100
+ @ordered = @config.paths.dup
101
+
102
+ unless @cache.read_tags.empty?
103
+ SiteDiff.log("Using sites from cache: " +
104
+ @cache.read_tags.sort.join(', '))
105
+ end
99
106
 
100
- hydra = Typhoeus::Hydra.new(max_concurrency: 3)
101
- @config.paths.each { |path| queue_read(hydra, path) }
102
- hydra.run
107
+ fetcher = Fetch.new(@cache, @config.paths,
108
+ :before => before, :after => after)
109
+ fetcher.run(&self.method(:process_results))
103
110
 
104
111
  # Order by original path order
105
112
  @results = @config.paths.map { |p| @results[p] }
113
+ return results.map{ |r| r unless r.success? }.compact.length
106
114
  end
107
115
 
108
116
  # Dump results to disk
109
- def dump(dir, report_before, report_after, failing_paths)
117
+ def dump(dir, report_before, report_after)
110
118
  report_before ||= before
111
119
  report_after ||= after
112
- FileUtils.mkdir_p(dir)
120
+ dir = Pathname.new(dir)
121
+ dir.mkpath unless dir.directory?
113
122
 
114
123
  # store diffs of each failing case, first wipe out existing diffs
115
- diff_dir = File.join(dir, DIFFS_DIR)
116
- FileUtils.rm_rf(diff_dir)
124
+ diff_dir = dir + DIFFS_DIR
125
+ diff_dir.rmtree if diff_dir.exist?
117
126
  results.each { |r| r.dump(dir) if r.status == Result::STATUS_FAILURE }
118
- SiteDiff::log "All diff files were dumped inside #{dir}"
127
+ SiteDiff::log "All diff files were dumped inside #{dir.expand_path}"
119
128
 
120
129
  # store failing paths
121
- SiteDiff::log "Writing failures to #{failing_paths}"
122
- File.open(failing_paths, 'w') do |f|
130
+ failures = dir + FAILURES_FILE
131
+ SiteDiff::log "Writing failures to #{failures.expand_path}"
132
+ failures.open('w') do |f|
123
133
  results.each { |r| f.puts r.path unless r.success? }
124
134
  end
125
135
 
126
136
  # create report of results
127
- report = Diff::generate_html_report(results, report_before, report_after)
128
- File.open(File.join(dir, "/report.html") , 'w') { |f| f.write(report) }
137
+ report = Diff::generate_html_report(results, report_before, report_after,
138
+ @cache)
139
+ dir.+(REPORT_FILE).open('w') { |f| f.write(report) }
140
+
141
+ # serve some settings
142
+ settings = { 'before' => report_before, 'after' => report_after,
143
+ 'cached' => @cache.read_tags.map { |t| t.to_s } }
144
+ dir.+(SETTINGS_FILE).open('w') { |f| YAML.dump(settings, f) }
129
145
  end
130
146
  end
@@ -0,0 +1,61 @@
1
+ require 'set'
2
+
3
+ class SiteDiff
4
+ class Cache
5
+ DEFAULT_FILENAME = 'cache.db'
6
+
7
+ attr_accessor :read_tags, :write_tags
8
+
9
+ def initialize(opts = {})
10
+ @file = opts[:file] || DEFAULT_FILENAME
11
+ @create = opts[:create]
12
+ @read_tags = Set.new
13
+ @write_tags = Set.new
14
+ end
15
+
16
+ def close; @dbm.close if defined? @dbm; end
17
+
18
+ # Is a tag cached?
19
+ def tag?(tag)
20
+ open
21
+ @dbm[tag.to_s]
22
+ end
23
+
24
+ def get(tag, path)
25
+ return nil unless @read_tags.include? tag
26
+ open or return nil
27
+ val = @dbm[key(tag, path)]
28
+ return val && Marshal.load(val)
29
+ end
30
+
31
+ def set(tag, path, result)
32
+ return unless @write_tags.include? tag
33
+ open or return
34
+ @dbm[tag.to_s] = 'TRUE'
35
+ @dbm[key(tag, path)] = Marshal.dump(result)
36
+ end
37
+
38
+ private
39
+ def key(tag, path)
40
+ # Ensure encoding stays the same!
41
+ Marshal.dump([tag, path.encode('UTF-8')])
42
+ end
43
+
44
+ # Ensure the DB is open
45
+ def open
46
+ # DBM adds an extra .db, ugh
47
+ return false unless @create || File.exist?(@file) ||
48
+ File.exist?(@file + '.db')
49
+ return true if defined? @dbm
50
+
51
+ begin
52
+ require 'gdbm'
53
+ @dbm = GDBM.new(@file)
54
+ rescue LoadError
55
+ require 'dbm'
56
+ @dbm = DBM.new(@file)
57
+ end
58
+ return true
59
+ end
60
+ end
61
+ end
@@ -1,12 +1,18 @@
1
1
  require 'thor'
2
- require 'sitediff/diff'
3
- require 'sitediff/sanitize'
4
- require 'sitediff/util/webserver'
5
- require 'open-uri'
6
- require 'uri'
2
+ require 'sitediff'
3
+ require 'sitediff/cache'
4
+ require 'sitediff/config'
5
+ require 'sitediff/config/creator'
6
+ require 'sitediff/fetch'
7
+ require 'sitediff/webserver/resultserver'
7
8
 
8
9
  class SiteDiff
9
10
  class Cli < Thor
11
+ class_option 'directory',
12
+ :type => :string,
13
+ :aliases => '-C',
14
+ :desc => "Go to a given directory before running."
15
+
10
16
  # Thor, by default, exits with 0 no matter what!
11
17
  def self.exit_on_failure?
12
18
  true
@@ -21,11 +27,15 @@ class SiteDiff
21
27
  :type => :string,
22
28
  :default => File.join('.', 'output'),
23
29
  :desc => "Location to write the output to."
24
- option 'paths',
30
+ option 'paths-file',
25
31
  :type => :string,
26
32
  :desc => 'Paths are read (one at a line) from PATHS: ' +
27
33
  'useful for iterating over sanitization rules',
28
34
  :aliases => '--paths-from-file'
35
+ option 'paths',
36
+ :type => :array,
37
+ :aliases => '-p',
38
+ :desc => "Fetch only these specific paths"
29
39
  option 'before',
30
40
  :type => :string,
31
41
  :desc => "URL used to fetch the before HTML. Acts as a prefix to specified paths",
@@ -42,16 +52,29 @@ class SiteDiff
42
52
  :type => :string,
43
53
  :desc => "After URL to use for reporting purposes. Useful if port forwarding.",
44
54
  :aliases => '--after-url-report'
45
- option 'cache',
55
+ option 'cached',
46
56
  :type => :string,
47
- :desc => "Filename to use for caching requests.",
48
- :lazy_default => 'cache.db'
57
+ :enum => %w[none all before after],
58
+ :default => 'before',
59
+ :desc => "Use the cached version of these sites, if available."
60
+ option 'quiet',
61
+ :type => :boolean,
62
+ :aliases => '-q',
63
+ :default => false,
64
+ :desc => "Show the difference between versions for each page"
49
65
  desc "diff [OPTIONS] [CONFIGFILES]", "Perform systematic diff on given URLs"
50
66
  def diff(*config_files)
51
- config = SiteDiff::Config.new(config_files)
67
+ config = chdir(config_files)
52
68
 
53
69
  # override config based on options
54
- if paths_file = options['paths']
70
+ paths = options['paths']
71
+ if paths_file = options['paths-file']
72
+ if paths then
73
+ SiteDiff::log "Can't have both --paths-file and --paths", :error
74
+ exit -1
75
+ end
76
+
77
+ paths_file = Pathname.new(paths_file).expand_path
55
78
  unless File.exists? paths_file
56
79
  raise Config::InvalidConfig,
57
80
  "Paths file '#{paths_file}' not found!"
@@ -59,32 +82,130 @@ class SiteDiff
59
82
  SiteDiff::log "Reading paths from: #{paths_file}"
60
83
  config.paths = File.readlines(paths_file)
61
84
  end
85
+ config.paths = paths if paths
86
+
62
87
  config.before['url'] = options['before'] if options['before']
63
88
  config.after['url'] = options['after'] if options['after']
64
89
 
65
- sitediff = SiteDiff.new(config, options['cache'])
66
- sitediff.run
90
+ # Setup cache
91
+ cache = SiteDiff::Cache.new(:create => options['cached'] != 'none')
92
+ cache.read_tags << :before if %w[before all].include?(options['cached'])
93
+ cache.read_tags << :after if %w[after all].include?(options['cached'])
94
+ cache.write_tags << :before << :after
95
+
96
+ sitediff = SiteDiff.new(config, cache, !options['quiet'])
97
+ num_failing = sitediff.run
98
+ exit_code = (num_failing > 0) ? 2 : 0;
67
99
 
68
- failing_paths = File.join(options['dump-dir'], 'failures.txt')
69
100
  sitediff.dump(options['dump-dir'], options['before-report'],
70
- options['after-report'], failing_paths)
101
+ options['after-report'])
71
102
  rescue Config::InvalidConfig => e
72
- SiteDiff.log "Invalid configuration: #{e.message}", :failure
103
+ SiteDiff.log "Invalid configuration: #{e.message}", :error
104
+ rescue SiteDiffException => e
105
+ SiteDiff.log e.message, :error
106
+ else # no exception was raised
107
+ # Thor::Error --> exit(1), guaranteed by exit_on_failure?
108
+ # Failing diff --> exit(2), populated above
109
+ exit(exit_code)
73
110
  end
74
111
 
75
112
  option :port,
76
113
  :type => :numeric,
77
- :default => SiteDiff::Util::Webserver::DEFAULT_PORT,
114
+ :default => SiteDiff::Webserver::DEFAULT_PORT,
78
115
  :desc => 'The port to serve on'
79
- option :directory,
116
+ option 'dump-dir',
80
117
  :type => :string,
81
118
  :default => 'output',
82
- :desc => 'The directory to serve',
83
- :aliases => '--dump-dir'
119
+ :desc => 'The directory to serve'
120
+ option :browse,
121
+ :type => :boolean,
122
+ :default => true,
123
+ :desc => "Whether to open the served content in your browser"
84
124
  desc "serve [OPTIONS]", "Serve the sitediff output directory over HTTP"
85
- def serve
86
- SiteDiff::Util::Webserver.serve(options[:port], options[:directory],
87
- :announce => true).wait
125
+ def serve(*config_files)
126
+ config = chdir(config_files, :config => false)
127
+
128
+ cache = Cache.new
129
+ cache.read_tags << :before << :after
130
+
131
+ SiteDiff::Webserver::ResultServer.new(
132
+ options[:port],
133
+ options['dump-dir'],
134
+ :browse => options[:browse],
135
+ :cache => cache,
136
+ :config => config,
137
+ ).wait
138
+ end
139
+
140
+ option :output,
141
+ :type => :string,
142
+ :default => 'sitediff',
143
+ :desc => 'Where to place the configuration',
144
+ :aliases => ['-o']
145
+ option :depth,
146
+ :type => :numeric,
147
+ :default => 3,
148
+ :desc => 'How deeply to crawl the given site'
149
+ option :rules,
150
+ :type => :string,
151
+ :enum => %w[yes no disabled],
152
+ :default => 'disabled',
153
+ :desc => 'Whether rules for the site should be auto-created'
154
+ desc "init URL [URL]", "Create a sitediff configuration"
155
+ def init(*urls)
156
+ unless (1..2).include? urls.size
157
+ SiteDiff.log "sitediff init requires one or two URLs", :error
158
+ exit 2
159
+ end
160
+
161
+ chdir([], :search => false)
162
+ creator = SiteDiff::Config::Creator.new(*urls)
163
+ creator.create(
164
+ :depth => options[:depth],
165
+ :directory => options[:output],
166
+ :rules => options[:rules] != 'no',
167
+ :rules_disabled => (options[:rules] == 'disabled'),
168
+ ) do |tag, info|
169
+ SiteDiff.log "Visited #{info.uri}, cached"
170
+ end
171
+
172
+ SiteDiff.log "Created #{creator.config_file.expand_path}", :success
173
+ SiteDiff.log "You can now run 'sitediff diff'", :success
174
+ end
175
+
176
+ option :url,
177
+ :type => :string,
178
+ :desc => 'A custom base URL to fetch from'
179
+ desc "store [CONFIGFILES]",
180
+ "Cache the current contents of a site for later comparison"
181
+ def store(*config_files)
182
+ config = chdir(config_files)
183
+ config.validate(:need_before => false)
184
+
185
+ cache = SiteDiff::Cache.new(:create => true)
186
+ cache.write_tags << :before
187
+
188
+ base = options[:url] || config.after['url']
189
+ fetcher = SiteDiff::Fetch.new(cache, config.paths, :before => base)
190
+ fetcher.run do |path, res|
191
+ SiteDiff.log "Visited #{path}, cached"
192
+ end
193
+ end
194
+
195
+ private
196
+ def chdir(files, opts = {})
197
+ opts = { :config => true, :search => true }.merge(opts)
198
+
199
+ dir = options['directory']
200
+ Dir.chdir(dir) if dir
201
+
202
+ return unless opts[:search]
203
+ begin
204
+ SiteDiff::Config.new(files, :search => !dir)
205
+ rescue SiteDiff::Config::ConfigNotFound => e
206
+ raise if opts[:config]
207
+ # If no config required, allow it to pass
208
+ end
88
209
  end
89
210
  end
90
211
  end