sitediff 0.0.6 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/bin/sitediff +9 -2
- data/lib/sitediff.rb +126 -81
- data/lib/sitediff/cache.rb +35 -6
- data/lib/sitediff/cli.rb +254 -119
- data/lib/sitediff/config.rb +362 -29
- data/lib/sitediff/config/creator.rb +53 -71
- data/lib/sitediff/config/preset.rb +75 -0
- data/lib/sitediff/crawler.rb +11 -15
- data/lib/sitediff/diff.rb +28 -9
- data/lib/sitediff/fetch.rb +9 -2
- data/lib/sitediff/files/diff.html.erb +20 -2
- data/lib/sitediff/files/jquery.min.js +2 -0
- data/lib/sitediff/files/normalize.css +349 -0
- data/lib/sitediff/files/report.html.erb +144 -0
- data/lib/sitediff/files/sidebyside.html.erb +5 -2
- data/lib/sitediff/files/sitediff.css +226 -30
- data/lib/sitediff/files/sitediff.js +176 -0
- data/lib/sitediff/report.rb +238 -0
- data/lib/sitediff/result.rb +47 -19
- data/lib/sitediff/sanitize.rb +29 -8
- data/lib/sitediff/sanitize/dom_transform.rb +45 -6
- data/lib/sitediff/sanitize/regexp.rb +23 -2
- data/lib/sitediff/uriwrapper.rb +56 -15
- data/lib/sitediff/webserver.rb +12 -3
- data/lib/sitediff/webserver/resultserver.rb +28 -33
- metadata +33 -16
- data/lib/sitediff/files/html_report.html.erb +0 -66
- data/lib/sitediff/files/rules/drupal.yaml +0 -63
- data/lib/sitediff/rules.rb +0 -65
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 1b7854497b5e81f48d810acec8106cbc66e33492d046e032e5516f76db26f142
|
4
|
+
data.tar.gz: a9349a79953237dd017600d49d38b8e734afc561f0ce09a1f8732e0e933530c9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0e91f665f3c59b8a65f16c6942ec49b9cc9ee7fd12b0a777eadb844a0b9819ab1fd9485495bf2c757ca7342a6198dcccb5ae546c4ddf2682f234d015b64309b2
|
7
|
+
data.tar.gz: 64b7980bbbade8710b6069af19a67083678c2bd5fa99674df3360c1c6a3ddf8a15de7c5be4e8349ec298fc1c0dc27535b816089cd4f8852b8c8633861d72a178
|
data/bin/sitediff
CHANGED
@@ -2,8 +2,15 @@
|
|
2
2
|
# frozen_string_literal: true
|
3
3
|
|
4
4
|
# when run as gem, $0 is /usr/local/bin/sitediff not this file
|
5
|
-
|
5
|
+
if $PROGRAM_NAME == __FILE__
|
6
|
+
$LOAD_PATH.unshift File.expand_path('../lib', __dir__)
|
7
|
+
end
|
6
8
|
|
7
9
|
require 'sitediff/cli'
|
8
10
|
|
9
|
-
|
11
|
+
begin
|
12
|
+
SiteDiff::Cli.start
|
13
|
+
rescue Interrupt
|
14
|
+
puts("\n")
|
15
|
+
SiteDiff.log('Stopping. Interrupted by user.')
|
16
|
+
end
|
data/lib/sitediff.rb
CHANGED
@@ -2,63 +2,85 @@
|
|
2
2
|
# frozen_string_literal: true
|
3
3
|
|
4
4
|
require 'sitediff/config'
|
5
|
+
require 'sitediff/diff'
|
5
6
|
require 'sitediff/fetch'
|
6
7
|
require 'sitediff/result'
|
8
|
+
require 'sitediff/report'
|
7
9
|
require 'pathname'
|
8
10
|
require 'rainbow'
|
11
|
+
require 'rubygems'
|
9
12
|
require 'yaml'
|
10
13
|
|
14
|
+
# SiteDiff Object.
|
11
15
|
class SiteDiff
|
12
|
-
|
16
|
+
attr_reader :config, :results
|
17
|
+
|
18
|
+
# SiteDiff installation directory.
|
19
|
+
ROOT_DIR = File.dirname(File.dirname(__FILE__))
|
20
|
+
|
21
|
+
# Path to misc files. Ex: *.erb, *.css.
|
13
22
|
FILES_DIR = File.join(File.dirname(__FILE__), 'sitediff', 'files')
|
14
23
|
|
15
|
-
#
|
16
|
-
|
17
|
-
|
18
|
-
#
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
bg = fg = nil
|
28
|
-
case type
|
29
|
-
when :info
|
30
|
-
bg = fg = nil
|
31
|
-
when :diff_success
|
32
|
-
bg = :green
|
24
|
+
# Logs a message.
|
25
|
+
#
|
26
|
+
# Label will be colorized and message will not.
|
27
|
+
# Type dictates the color: can be :success, :error, or :failure.
|
28
|
+
#
|
29
|
+
# TODO: Only print :debug messages in debug mode.
|
30
|
+
def self.log(message, type = :info, label = nil)
|
31
|
+
# Prepare label.
|
32
|
+
label ||= type unless type == :info
|
33
|
+
label = label.to_s
|
34
|
+
unless label.empty?
|
35
|
+
# Colorize label.
|
33
36
|
fg = :black
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
37
|
+
bg = :blue
|
38
|
+
|
39
|
+
case type
|
40
|
+
when :info
|
41
|
+
bg = :cyan
|
42
|
+
when :success
|
43
|
+
bg = :green
|
44
|
+
when :error
|
45
|
+
bg = :red
|
46
|
+
when :warning
|
47
|
+
bg = :yellow
|
48
|
+
end
|
49
|
+
|
50
|
+
label = '[' + label.to_s + ']'
|
51
|
+
label = Rainbow(label)
|
52
|
+
label = label.bg(bg) if bg
|
53
|
+
label = label.fg(fg) if fg
|
54
|
+
|
55
|
+
# Add a space after the label.
|
56
|
+
label += ' '
|
41
57
|
end
|
42
|
-
|
43
|
-
|
44
|
-
label = label.fg(fg) if fg
|
45
|
-
puts label + ' ' + str
|
58
|
+
|
59
|
+
puts label + message
|
46
60
|
end
|
47
61
|
|
48
|
-
|
62
|
+
##
|
63
|
+
# Returns the "before" site's URL.
|
64
|
+
#
|
65
|
+
# TODO: Remove in favor of config.before_url.
|
49
66
|
def before
|
50
67
|
@config.before['url']
|
51
68
|
end
|
52
69
|
|
70
|
+
##
|
71
|
+
# Returns the "after" site's URL.
|
72
|
+
#
|
73
|
+
# TODO: Remove in favor of config.after_url.
|
53
74
|
def after
|
54
75
|
@config.after['url']
|
55
76
|
end
|
56
77
|
|
57
|
-
|
78
|
+
# Initialize SiteDiff.
|
79
|
+
def initialize(config, cache, verbose = true, debug = false)
|
58
80
|
@cache = cache
|
59
81
|
@verbose = verbose
|
60
82
|
@debug = debug
|
61
|
-
|
83
|
+
|
62
84
|
# Check for single-site mode
|
63
85
|
validate_opts = {}
|
64
86
|
if !config.before['url'] && @cache.tag?(:before)
|
@@ -69,37 +91,48 @@ class SiteDiff
|
|
69
91
|
validate_opts[:need_before] = false
|
70
92
|
end
|
71
93
|
config.validate(validate_opts)
|
72
|
-
|
73
|
-
|
94
|
+
# Configure diff.
|
95
|
+
Diff.diff_config(config)
|
74
96
|
@config = config
|
75
97
|
end
|
76
98
|
|
77
|
-
# Sanitize HTML
|
99
|
+
# Sanitize HTML.
|
78
100
|
def sanitize(path, read_results)
|
79
101
|
%i[before after].map do |tag|
|
80
102
|
html = read_results[tag].content
|
103
|
+
# TODO: See why encoding is empty while running tests.
|
104
|
+
#
|
105
|
+
# The presence of an "encoding" value used to be used to determine
|
106
|
+
# if the sanitizer would be called. However, encoding turns up blank
|
107
|
+
# during rspec tests for some reason.
|
81
108
|
encoding = read_results[tag].encoding
|
82
|
-
if encoding
|
83
|
-
|
84
|
-
Sanitizer.new(html,
|
109
|
+
if encoding || html.length.positive?
|
110
|
+
section = @config.send(tag, true)
|
111
|
+
Sanitizer.new(html, section, path: path).sanitize
|
85
112
|
else
|
86
113
|
html
|
87
114
|
end
|
88
115
|
end
|
89
116
|
end
|
90
117
|
|
91
|
-
|
118
|
+
##
|
119
|
+
# Process a set of read results.
|
120
|
+
#
|
121
|
+
# This is the callback that processes items fetched by the Fetcher.
|
92
122
|
def process_results(path, read_results)
|
93
|
-
|
123
|
+
error = (read_results[:before].error || read_results[:after].error)
|
124
|
+
if error
|
94
125
|
diff = Result.new(path, nil, nil, nil, nil, error)
|
95
126
|
else
|
96
127
|
begin
|
97
|
-
diff = Result.new(
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
128
|
+
diff = Result.new(
|
129
|
+
path,
|
130
|
+
*sanitize(path, read_results),
|
131
|
+
read_results[:before].encoding,
|
132
|
+
read_results[:after].encoding,
|
133
|
+
nil
|
134
|
+
)
|
135
|
+
rescue StandardError => e
|
103
136
|
raise if @debug
|
104
137
|
|
105
138
|
Result.new(path, nil, nil, nil, nil, "Sanitization error: #{e}")
|
@@ -114,60 +147,72 @@ class SiteDiff
|
|
114
147
|
end
|
115
148
|
end
|
116
149
|
|
117
|
-
|
118
|
-
#
|
119
|
-
|
150
|
+
##
|
151
|
+
# Compute diff as per config.
|
152
|
+
#
|
153
|
+
# @return [Integer]
|
154
|
+
# Number of paths which have diffs.
|
155
|
+
def run
|
120
156
|
# Map of path -> Result object, populated by process_results
|
121
157
|
@results = {}
|
122
158
|
@ordered = @config.paths.dup
|
123
159
|
|
124
160
|
unless @cache.read_tags.empty?
|
125
|
-
SiteDiff.log('Using sites from cache: ' +
|
126
|
-
@cache.read_tags.sort.join(', '))
|
161
|
+
SiteDiff.log('Using sites from cache: ' + @cache.read_tags.sort.join(', '))
|
127
162
|
end
|
128
163
|
|
129
164
|
# TODO: Fix this after config merge refactor!
|
130
165
|
# Not quite right. We are not passing @config.before or @config.after
|
131
166
|
# so passing this instead but @config.after['curl_opts'] is ignored.
|
167
|
+
curl_opts = @config.setting :curl_opts
|
132
168
|
config_curl_opts = @config.before['curl_opts']
|
133
169
|
curl_opts = config_curl_opts.clone.merge(curl_opts) if config_curl_opts
|
134
|
-
fetcher = Fetch.new(
|
135
|
-
|
170
|
+
fetcher = Fetch.new(
|
171
|
+
@cache,
|
172
|
+
@config.paths,
|
173
|
+
@config.setting(:interval),
|
174
|
+
@config.setting(:concurrency),
|
175
|
+
curl_opts,
|
176
|
+
@debug,
|
177
|
+
before: @config.before_url,
|
178
|
+
after: @config.after_url
|
179
|
+
)
|
180
|
+
|
181
|
+
# Run the Fetcher with "process results" as a callback.
|
136
182
|
fetcher.run(&method(:process_results))
|
137
183
|
|
138
184
|
# Order by original path order
|
139
|
-
@results = @config.paths.map { |
|
185
|
+
@results = @config.paths.map { |path| @results[path] }
|
140
186
|
results.map { |r| r unless r.success? }.compact.length
|
141
187
|
end
|
142
188
|
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
# store diffs of each failing case, first wipe out existing diffs
|
151
|
-
diff_dir = dir + DIFFS_DIR
|
152
|
-
diff_dir.rmtree if diff_dir.exist?
|
153
|
-
results.each { |r| r.dump(dir) if r.status == Result::STATUS_FAILURE }
|
154
|
-
SiteDiff.log "All diff files were dumped inside #{dir.expand_path}"
|
155
|
-
|
156
|
-
# store failing paths
|
157
|
-
failures = dir + FAILURES_FILE
|
158
|
-
SiteDiff.log "Writing failures to #{failures.expand_path}"
|
159
|
-
failures.open('w') do |f|
|
160
|
-
results.each { |r| f.puts r.path unless r.success? }
|
189
|
+
##
|
190
|
+
# Get a reporter object to help with report generation.
|
191
|
+
def report
|
192
|
+
if @results.nil?
|
193
|
+
raise SiteDiffException(
|
194
|
+
'No results detected. Run SiteDiff.run before SiteDiff.report.'
|
195
|
+
)
|
161
196
|
end
|
162
197
|
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
198
|
+
Report.new(@config, @cache, @results)
|
199
|
+
end
|
200
|
+
|
201
|
+
##
|
202
|
+
# Get SiteDiff gemspec.
|
203
|
+
def self.gemspec
|
204
|
+
file = ROOT_DIR + '/sitediff.gemspec'
|
205
|
+
Gem::Specification.load(file)
|
206
|
+
end
|
167
207
|
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
208
|
+
##
|
209
|
+
# Ensures that a directory exists and returns a Pathname for it.
|
210
|
+
#
|
211
|
+
# @param [String] dir
|
212
|
+
# path/to/directory
|
213
|
+
def self.ensure_dir(dir)
|
214
|
+
dir = Pathname.new(dir) unless dir.is_a? Pathname
|
215
|
+
dir.mkpath unless dir.directory?
|
216
|
+
dir
|
172
217
|
end
|
173
218
|
end
|
data/lib/sitediff/cache.rb
CHANGED
@@ -4,28 +4,42 @@ require 'set'
|
|
4
4
|
require 'fileutils'
|
5
5
|
|
6
6
|
class SiteDiff
|
7
|
+
# SiteDiff Cache Handler.
|
7
8
|
class Cache
|
8
9
|
attr_accessor :read_tags, :write_tags
|
9
10
|
|
11
|
+
##
|
12
|
+
# Creates a Cache object.
|
10
13
|
def initialize(opts = {})
|
11
14
|
@create = opts[:create]
|
12
15
|
|
13
|
-
# Read and Write tags are sets that can contain :before and :after
|
14
|
-
# They indicate whether we should use the cache for reading or writing
|
16
|
+
# Read and Write tags are sets that can contain :before and :after.
|
17
|
+
# They indicate whether we should use the cache for reading or writing.
|
15
18
|
@read_tags = Set.new
|
16
19
|
@write_tags = Set.new
|
20
|
+
|
21
|
+
# The directory used by the cache for storage.
|
17
22
|
@dir = opts[:directory] || '.'
|
18
23
|
end
|
19
24
|
|
25
|
+
##
|
20
26
|
# Is a tag cached?
|
27
|
+
# TODO: Rename it to is_cached? as it makes more sense.
|
21
28
|
def tag?(tag)
|
22
29
|
File.directory?(File.join(@dir, 'snapshot', tag.to_s))
|
23
30
|
end
|
24
31
|
|
32
|
+
##
|
33
|
+
# Get data from cache.
|
25
34
|
def get(tag, path)
|
26
35
|
return nil unless @read_tags.include? tag
|
27
36
|
|
28
|
-
filename = File.join(
|
37
|
+
filename = File.join(
|
38
|
+
@dir,
|
39
|
+
'snapshot',
|
40
|
+
tag.to_s,
|
41
|
+
*path.split(File::SEPARATOR)
|
42
|
+
)
|
29
43
|
|
30
44
|
filename = File.join(filename, 'index.html') if File.directory?(filename)
|
31
45
|
return nil unless File.file? filename
|
@@ -33,10 +47,17 @@ class SiteDiff
|
|
33
47
|
Marshal.load(File.read(filename))
|
34
48
|
end
|
35
49
|
|
50
|
+
##
|
51
|
+
# Set data to cache.
|
36
52
|
def set(tag, path, result)
|
37
53
|
return unless @write_tags.include? tag
|
38
54
|
|
39
|
-
filename = File.join(
|
55
|
+
filename = File.join(
|
56
|
+
@dir,
|
57
|
+
'snapshot',
|
58
|
+
tag.to_s,
|
59
|
+
*path.split(File::SEPARATOR)
|
60
|
+
)
|
40
61
|
|
41
62
|
filename = File.join(filename, 'index.html') if File.directory?(filename)
|
42
63
|
filepath = Pathname.new(filename)
|
@@ -50,23 +71,31 @@ class SiteDiff
|
|
50
71
|
# May cause problems if action is not atomic!
|
51
72
|
# Move existing file to dir/index.html first
|
52
73
|
# Not robust! Should generate an UUID or something.
|
53
|
-
|
74
|
+
if File.exist?(tempname)
|
75
|
+
SiteDiff.log "Overwriting file #{tempname}", :warning
|
76
|
+
end
|
54
77
|
curdir.rename(tempname)
|
55
78
|
filepath.dirname.mkpath
|
56
79
|
# Should only happen in strange situations such as when the path
|
57
80
|
# is foo/index.html/bar (i.e., index.html is a directory)
|
58
|
-
|
81
|
+
if (curdir + 'index.html').exist?
|
82
|
+
SiteDiff.log "Overwriting file #{tempname}", :warning
|
83
|
+
end
|
59
84
|
tempname.rename(curdir + 'index.html')
|
60
85
|
end
|
61
86
|
end
|
62
87
|
File.open(filename, 'w') { |file| file.write(Marshal.dump(result)) }
|
63
88
|
end
|
64
89
|
|
90
|
+
##
|
91
|
+
# TODO: Document this or remove it if unused.
|
65
92
|
def key(tag, path)
|
66
93
|
# Ensure encoding stays the same!
|
67
94
|
Marshal.dump([tag, path.encode('UTF-8')])
|
68
95
|
end
|
69
96
|
|
97
|
+
##
|
98
|
+
# Ensures that a directory exists.
|
70
99
|
def get_dir(directory)
|
71
100
|
# Create the dir. Must go before cache initialization!
|
72
101
|
@dir = Pathname.new(directory || '.')
|
data/lib/sitediff/cli.rb
CHANGED
@@ -5,32 +5,32 @@ require 'sitediff'
|
|
5
5
|
require 'sitediff/cache'
|
6
6
|
require 'sitediff/config'
|
7
7
|
require 'sitediff/config/creator'
|
8
|
+
require 'sitediff/config/preset'
|
8
9
|
require 'sitediff/fetch'
|
9
10
|
require 'sitediff/webserver/resultserver'
|
10
11
|
|
11
12
|
class SiteDiff
|
13
|
+
# SiteDiff CLI.
|
14
|
+
# TODO: Use config.defaults to feed default values for sitediff.yaml params?
|
12
15
|
class Cli < Thor
|
13
16
|
class_option 'directory',
|
14
17
|
type: :string,
|
15
18
|
aliases: '-C',
|
16
19
|
default: 'sitediff',
|
17
20
|
desc: 'Configuration directory'
|
18
|
-
class_option :
|
19
|
-
type: :hash,
|
20
|
-
default: {},
|
21
|
-
desc: 'Options to be passed to curl'
|
22
|
-
class_option :insecure,
|
21
|
+
class_option :debug,
|
23
22
|
type: :boolean,
|
23
|
+
aliases: '-d',
|
24
24
|
default: false,
|
25
|
-
desc: '
|
26
|
-
class_option
|
25
|
+
desc: 'Stop on certain errors and produce error trace backs.'
|
26
|
+
class_option 'verbose',
|
27
27
|
type: :boolean,
|
28
|
+
aliases: '-v',
|
28
29
|
default: false,
|
29
|
-
desc: '
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
desc: 'Crawling delay - interval in milliseconds'
|
30
|
+
desc: 'Show verbose output in terminal'
|
31
|
+
|
32
|
+
# Command aliases.
|
33
|
+
map recrawl: :crawl
|
34
34
|
|
35
35
|
# Thor, by default, exits with 0 no matter what!
|
36
36
|
def self.exit_on_failure?
|
@@ -42,6 +42,20 @@ class SiteDiff
|
|
42
42
|
true
|
43
43
|
end
|
44
44
|
|
45
|
+
desc 'version', 'Show version information'
|
46
|
+
##
|
47
|
+
# Show version information.
|
48
|
+
def version
|
49
|
+
gemspec = SiteDiff.gemspec
|
50
|
+
output = []
|
51
|
+
output.push("Sitediff CLI #{gemspec.version}")
|
52
|
+
if options[:verbose]
|
53
|
+
output.push('Website: ' + gemspec.homepage)
|
54
|
+
output.push('GitHub: ' + gemspec.metadata['source_code_uri'])
|
55
|
+
end
|
56
|
+
puts output.join("\n")
|
57
|
+
end
|
58
|
+
|
45
59
|
option 'paths-file',
|
46
60
|
type: :string,
|
47
61
|
desc: 'Paths are read (one at a line) from PATHS: ' \
|
@@ -53,79 +67,118 @@ class SiteDiff
|
|
53
67
|
desc: 'Specific path or paths to fetch'
|
54
68
|
option 'before',
|
55
69
|
type: :string,
|
56
|
-
desc: 'URL
|
70
|
+
desc: 'URL to the "before" site, prefixed to all paths.',
|
57
71
|
aliases: '--before-url'
|
58
72
|
option 'after',
|
59
73
|
type: :string,
|
60
|
-
desc: 'URL
|
74
|
+
desc: 'URL to the "after" site, prefixed to all paths.',
|
61
75
|
aliases: '--after-url'
|
76
|
+
option 'report-format',
|
77
|
+
type: :string,
|
78
|
+
enum: %w[html json],
|
79
|
+
default: 'html',
|
80
|
+
desc: 'The format in which a report should be generated.'
|
81
|
+
# TODO: Deprecate the parameters before-report / after-report?
|
62
82
|
option 'before-report',
|
63
83
|
type: :string,
|
64
|
-
desc: '
|
84
|
+
desc: 'URL to use in reports. Useful if port forwarding.',
|
65
85
|
aliases: '--before-url-report'
|
66
86
|
option 'after-report',
|
67
87
|
type: :string,
|
68
|
-
desc: '
|
88
|
+
desc: 'URL to use in reports. Useful if port forwarding.',
|
69
89
|
aliases: '--after-url-report'
|
70
90
|
option 'cached',
|
71
91
|
type: :string,
|
72
92
|
enum: %w[none all before after],
|
73
93
|
default: 'before',
|
74
94
|
desc: 'Use the cached version of these sites, if available.'
|
75
|
-
option '
|
95
|
+
option 'ignore-whitespace',
|
76
96
|
type: :boolean,
|
77
|
-
aliases: '-v',
|
78
97
|
default: false,
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
98
|
+
aliases: '-w',
|
99
|
+
desc: 'Ignore changes in whitespace.'
|
100
|
+
option 'export',
|
101
|
+
type: :boolean,
|
102
|
+
default: false,
|
103
|
+
aliases: '-e',
|
104
|
+
desc: 'Export report to files. This option forces HTML format.'
|
105
|
+
desc 'diff [OPTIONS] [CONFIG-FILE]',
|
106
|
+
'Compute diffs on configured URLs.'
|
107
|
+
##
|
108
|
+
# Computes diffs.
|
109
|
+
def diff(config_file = nil)
|
88
110
|
@dir = get_dir(options['directory'])
|
89
|
-
config = SiteDiff::Config.new(
|
90
|
-
|
91
|
-
# override config based on options
|
92
|
-
paths = options['paths']
|
93
|
-
if (paths_file = options['paths-file'])
|
94
|
-
if paths
|
95
|
-
SiteDiff.log "Can't have both --paths-file and --paths", :error
|
96
|
-
exit(-1)
|
97
|
-
end
|
111
|
+
config = SiteDiff::Config.new(config_file, @dir)
|
98
112
|
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
end
|
104
|
-
SiteDiff.log "Reading paths from: #{paths_file}"
|
105
|
-
config.paths = File.readlines(paths_file)
|
113
|
+
# Determine "paths" override based on options.
|
114
|
+
if options['paths'] && options['paths-file']
|
115
|
+
SiteDiff.log "Can't specify both --paths-file and --paths.", :error
|
116
|
+
exit(-1)
|
106
117
|
end
|
107
|
-
config.paths = paths if paths
|
108
118
|
|
119
|
+
# Ignore whitespace option.
|
120
|
+
config.ignore_whitespace = options['ignore-whitespace'] if options['ignore-whitespace']
|
121
|
+
|
122
|
+
# Export report option.
|
123
|
+
config.export = options['export']
|
124
|
+
|
125
|
+
# Apply "paths" override, if any.
|
126
|
+
config.paths = options['paths'] if options['paths']
|
127
|
+
|
128
|
+
# Determine and apply "paths-file", if "paths" is not specified.
|
129
|
+
unless options['paths']
|
130
|
+
paths_file = options['paths-file']
|
131
|
+
paths_file ||= File.join(@dir, Config::DEFAULT_PATHS_FILENAME)
|
132
|
+
paths_file = File.expand_path(paths_file)
|
133
|
+
|
134
|
+
paths_count = config.paths_file_read(paths_file)
|
135
|
+
SiteDiff.log "Read #{paths_count} paths from: #{paths_file}"
|
136
|
+
end
|
137
|
+
|
138
|
+
# TODO: Why do we allow before and after override during diff?
|
109
139
|
config.before['url'] = options['before'] if options['before']
|
110
140
|
config.after['url'] = options['after'] if options['after']
|
111
141
|
|
112
|
-
#
|
113
|
-
cache = SiteDiff::Cache.new(
|
114
|
-
|
142
|
+
# Prepare cache.
|
143
|
+
cache = SiteDiff::Cache.new(
|
144
|
+
create: options['cached'] != 'none',
|
145
|
+
directory: @dir
|
146
|
+
)
|
115
147
|
cache.read_tags << :before if %w[before all].include?(options['cached'])
|
116
148
|
cache.read_tags << :after if %w[after all].include?(options['cached'])
|
117
149
|
cache.write_tags << :before << :after
|
118
150
|
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
151
|
+
# Run sitediff.
|
152
|
+
sitediff = SiteDiff.new(
|
153
|
+
config,
|
154
|
+
cache,
|
155
|
+
options['verbose'],
|
156
|
+
options[:debug]
|
157
|
+
)
|
158
|
+
num_failing = sitediff.run
|
159
|
+
exit_code = num_failing.positive? ? 2 : 0
|
160
|
+
|
161
|
+
# Generate HTML report.
|
162
|
+
if options['report-format'] == 'html' || config.export
|
163
|
+
sitediff.report.generate_html(
|
164
|
+
@dir,
|
165
|
+
options['before-report'],
|
166
|
+
options['after-report']
|
167
|
+
)
|
168
|
+
end
|
123
169
|
|
124
|
-
|
125
|
-
|
170
|
+
# Generate JSON report.
|
171
|
+
if options['report-format'] == 'json' && config.export == false
|
172
|
+
sitediff.report.generate_json @dir
|
173
|
+
end
|
174
|
+
|
175
|
+
SiteDiff.log 'Run "sitediff serve" to see a report.' unless options['export']
|
126
176
|
rescue Config::InvalidConfig => e
|
127
177
|
SiteDiff.log "Invalid configuration: #{e.message}", :error
|
128
|
-
SiteDiff.log
|
178
|
+
SiteDiff.log e.backtrace, :error if options[:verbose]
|
179
|
+
rescue Config::ConfigNotFound => e
|
180
|
+
SiteDiff.log "Invalid configuration: #{e.message}", :error
|
181
|
+
SiteDiff.log e.backtrace, :error if options[:verbose]
|
129
182
|
else # no exception was raised
|
130
183
|
# Thor::Error --> exit(1), guaranteed by exit_on_failure?
|
131
184
|
# Failing diff --> exit(2), populated above
|
@@ -140,11 +193,14 @@ class SiteDiff
|
|
140
193
|
type: :boolean,
|
141
194
|
default: true,
|
142
195
|
desc: 'Whether to open the served content in your browser'
|
143
|
-
desc 'serve [OPTIONS]',
|
144
|
-
|
145
|
-
|
146
|
-
|
196
|
+
desc 'serve [OPTIONS] [CONFIG-FILE]',
|
197
|
+
'Serve SiteDiff report directory over HTTP.'
|
198
|
+
##
|
199
|
+
# Serves SiteDiff report for accessing in the browser.
|
200
|
+
def serve(config_file = nil)
|
147
201
|
@dir = get_dir(options['directory'])
|
202
|
+
config = SiteDiff::Config.new(config_file, @dir)
|
203
|
+
|
148
204
|
cache = Cache.new(directory: @dir)
|
149
205
|
cache.read_tags << :before << :after
|
150
206
|
|
@@ -157,85 +213,102 @@ class SiteDiff
|
|
157
213
|
).wait
|
158
214
|
rescue SiteDiffException => e
|
159
215
|
SiteDiff.log e.message, :error
|
160
|
-
SiteDiff.log e.backtrace, :error
|
216
|
+
SiteDiff.log e.backtrace, :error if options[:verbose]
|
161
217
|
end
|
162
218
|
|
163
219
|
option :depth,
|
164
220
|
type: :numeric,
|
165
|
-
default:
|
221
|
+
default: Config::DEFAULT_CONFIG['settings']['depth'],
|
166
222
|
desc: 'How deeply to crawl the given site'
|
167
|
-
option :
|
223
|
+
option :crawl,
|
224
|
+
type: :boolean,
|
225
|
+
default: true,
|
226
|
+
desc: 'Run "sitediff crawl" to discover paths.'
|
227
|
+
option :preset,
|
168
228
|
type: :string,
|
169
|
-
enum:
|
170
|
-
|
171
|
-
desc: 'Whether rules for the site should be auto-created'
|
229
|
+
enum: Config::Preset.all,
|
230
|
+
desc: 'Framework-specific presets to apply.'
|
172
231
|
option :concurrency,
|
173
232
|
type: :numeric,
|
174
|
-
default:
|
175
|
-
desc: 'Max number of concurrent connections made'
|
233
|
+
default: Config::DEFAULT_CONFIG['settings']['concurrency'],
|
234
|
+
desc: 'Max number of concurrent connections made.'
|
235
|
+
option :interval,
|
236
|
+
type: :numeric,
|
237
|
+
default: Config::DEFAULT_CONFIG['settings']['interval'],
|
238
|
+
desc: 'Crawling delay - interval in milliseconds.'
|
176
239
|
option :whitelist,
|
177
240
|
type: :string,
|
178
|
-
default: '',
|
179
|
-
desc: 'Optional whitelist for crawling'
|
241
|
+
default: Config::DEFAULT_CONFIG['settings']['whitelist'],
|
242
|
+
desc: 'Optional whitelist for crawling.'
|
180
243
|
option :blacklist,
|
181
244
|
type: :string,
|
182
|
-
default: '',
|
183
|
-
desc: 'Optional blacklist for crawling'
|
184
|
-
|
245
|
+
default: Config::DEFAULT_CONFIG['settings']['blacklist'],
|
246
|
+
desc: 'Optional blacklist for crawling.'
|
247
|
+
# TODO: Remove this option. Always ignore SSL errors.
|
248
|
+
option :insecure,
|
249
|
+
type: :boolean,
|
250
|
+
default: false,
|
251
|
+
desc: 'Ignore many HTTPS/SSL errors'
|
252
|
+
option :curl_options,
|
253
|
+
type: :hash,
|
254
|
+
default: {},
|
255
|
+
desc: 'Options to be passed to curl'
|
256
|
+
desc 'init URL [URL]', 'Create a sitediff configuration.'
|
257
|
+
##
|
258
|
+
# Initializes a sitediff (yaml) configuration file.
|
185
259
|
def init(*urls)
|
186
260
|
unless (1..2).cover? urls.size
|
187
261
|
SiteDiff.log 'sitediff init requires one or two URLs', :error
|
188
262
|
exit(2)
|
189
263
|
end
|
190
264
|
|
191
|
-
|
192
|
-
check_interval(@interval)
|
265
|
+
# Prepare a config object and write it to the file system.
|
193
266
|
@dir = get_dir(options['directory'])
|
194
|
-
|
195
|
-
@whitelist = create_regexp(options['whitelist'])
|
196
|
-
@blacklist = create_regexp(options['blacklist'])
|
197
|
-
creator = SiteDiff::Config::Creator.new(options[:concurrency],
|
198
|
-
options['interval'],
|
199
|
-
@whitelist,
|
200
|
-
@blacklist,
|
201
|
-
curl_opts,
|
202
|
-
options[:debug],
|
203
|
-
*urls)
|
267
|
+
creator = SiteDiff::Config::Creator.new(options[:debug], *urls)
|
204
268
|
creator.create(
|
205
269
|
depth: options[:depth],
|
206
270
|
directory: @dir,
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
271
|
+
concurrency: options[:concurrency],
|
272
|
+
interval: options[:interval],
|
273
|
+
whitelist: Config.create_regexp(options['whitelist']),
|
274
|
+
blacklist: Config.create_regexp(options['blacklist']),
|
275
|
+
preset: options[:preset],
|
276
|
+
curl_opts: get_curl_opts(options)
|
277
|
+
)
|
213
278
|
SiteDiff.log "Created #{creator.config_file.expand_path}", :success
|
214
|
-
|
279
|
+
|
280
|
+
# Discover paths, if enabled.
|
281
|
+
if options[:crawl]
|
282
|
+
crawl(creator.config_file)
|
283
|
+
SiteDiff.log 'You can now run "sitediff diff".', :success
|
284
|
+
else
|
285
|
+
SiteDiff.log 'Run "sitediff crawl" to discover paths. You should then be able to run "sitediff diff".', :info
|
286
|
+
end
|
215
287
|
end
|
216
288
|
|
217
289
|
option :url,
|
218
290
|
type: :string,
|
219
291
|
desc: 'A custom base URL to fetch from'
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
'Cache the current contents of a site for later comparison'
|
226
|
-
def store(*config_files)
|
292
|
+
desc 'store [CONFIG-FILE]',
|
293
|
+
'Cache the current contents of a site for later comparison.'
|
294
|
+
##
|
295
|
+
# Caches the current version of the site.
|
296
|
+
def store(config_file = nil)
|
227
297
|
@dir = get_dir(options['directory'])
|
228
|
-
config = SiteDiff::Config.new(
|
298
|
+
config = SiteDiff::Config.new(config_file, @dir)
|
299
|
+
# TODO: Figure out how to remove this config.validate call.
|
229
300
|
config.validate(need_before: false)
|
301
|
+
config.paths_file_read
|
302
|
+
|
230
303
|
cache = SiteDiff::Cache.new(directory: @dir, create: true)
|
231
304
|
cache.write_tags << :before
|
232
305
|
|
233
306
|
base = options[:url] || config.after['url']
|
234
307
|
fetcher = SiteDiff::Fetch.new(cache,
|
235
308
|
config.paths,
|
236
|
-
|
237
|
-
|
238
|
-
get_curl_opts(
|
309
|
+
config.setting(:interval),
|
310
|
+
config.setting(:concurrency),
|
311
|
+
get_curl_opts(config.settings),
|
239
312
|
options[:debug],
|
240
313
|
before: base)
|
241
314
|
fetcher.run do |path, _res|
|
@@ -243,11 +316,70 @@ class SiteDiff
|
|
243
316
|
end
|
244
317
|
end
|
245
318
|
|
319
|
+
desc 'crawl [CONFIG-FILE]',
|
320
|
+
'Crawl the "before" site to discover paths.'
|
321
|
+
##
|
322
|
+
# Crawls the "before" site to determine "paths".
|
323
|
+
#
|
324
|
+
# TODO: Move actual crawling to sitediff.crawl(config).
|
325
|
+
# TODO: Switch to paths = sitediff.crawl().
|
326
|
+
def crawl(config_file = nil)
|
327
|
+
# Prepare configuration.
|
328
|
+
@dir = get_dir(options['directory'])
|
329
|
+
@config = SiteDiff::Config.new(config_file, @dir)
|
330
|
+
|
331
|
+
# Prepare cache.
|
332
|
+
@cache = SiteDiff::Cache.new(
|
333
|
+
create: options['cached'] != 'none',
|
334
|
+
directory: @dir
|
335
|
+
)
|
336
|
+
@cache.write_tags << :before << :after
|
337
|
+
|
338
|
+
# Crawl with Hydra to discover paths.
|
339
|
+
hydra = Typhoeus::Hydra.new(
|
340
|
+
max_concurrency: @config.setting(:concurrency)
|
341
|
+
)
|
342
|
+
@paths = {}
|
343
|
+
@config.roots.each do |tag, url|
|
344
|
+
Crawler.new(
|
345
|
+
hydra,
|
346
|
+
url,
|
347
|
+
@config.setting(:interval),
|
348
|
+
@config.setting(:whitelist),
|
349
|
+
@config.setting(:blacklist),
|
350
|
+
@config.setting(:depth),
|
351
|
+
get_curl_opts(@config.settings),
|
352
|
+
@debug
|
353
|
+
) do |info|
|
354
|
+
SiteDiff.log "Visited #{info.uri}, cached."
|
355
|
+
after_crawl(tag, info)
|
356
|
+
end
|
357
|
+
end
|
358
|
+
hydra.run
|
359
|
+
|
360
|
+
# Write paths to a file.
|
361
|
+
@paths = @paths.values.reduce(&:|).to_a.sort
|
362
|
+
@config.paths_file_write(@paths)
|
363
|
+
|
364
|
+
# Log output.
|
365
|
+
file = Pathname.new(@dir) + Config::DEFAULT_PATHS_FILENAME
|
366
|
+
SiteDiff.log ''
|
367
|
+
SiteDiff.log "#{@paths.length} page(s) found."
|
368
|
+
SiteDiff.log "Created #{file.expand_path}.", :success, 'done'
|
369
|
+
end
|
370
|
+
|
246
371
|
no_commands do
|
372
|
+
# Generates CURL options.
|
373
|
+
#
|
374
|
+
# TODO: This should be in the config class instead.
|
375
|
+
# TODO: Make all requests insecure and avoid custom curl-opts.
|
247
376
|
def get_curl_opts(options)
|
248
377
|
# We do want string keys here
|
249
378
|
bool_hash = { 'true' => true, 'false' => false }
|
250
|
-
curl_opts = UriWrapper::DEFAULT_CURL_OPTS
|
379
|
+
curl_opts = UriWrapper::DEFAULT_CURL_OPTS
|
380
|
+
.clone
|
381
|
+
.merge(options['curl_options'] || {})
|
382
|
+
.merge(options['curl_opts'] || {})
|
251
383
|
curl_opts.each { |k, v| curl_opts[k] = bool_hash.fetch(v, v) }
|
252
384
|
if options[:insecure]
|
253
385
|
curl_opts[:ssl_verifypeer] = false
|
@@ -256,13 +388,8 @@ class SiteDiff
|
|
256
388
|
curl_opts
|
257
389
|
end
|
258
390
|
|
259
|
-
|
260
|
-
|
261
|
-
SiteDiff.log '--concurrency must be set to 1 in order to enable the interval feature'
|
262
|
-
exit(2)
|
263
|
-
end
|
264
|
-
end
|
265
|
-
|
391
|
+
##
|
392
|
+
# Ensures that the given directory exists.
|
266
393
|
def get_dir(directory)
|
267
394
|
# Create the dir. Must go before cache initialization!
|
268
395
|
@dir = Pathname.new(directory || '.')
|
@@ -270,16 +397,24 @@ class SiteDiff
|
|
270
397
|
@dir.to_s
|
271
398
|
end
|
272
399
|
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
400
|
+
##
|
401
|
+
# Processes a crawled path.
|
402
|
+
def after_crawl(tag, info)
|
403
|
+
path = UriWrapper.canonicalize(info.relative)
|
404
|
+
|
405
|
+
# Register the path.
|
406
|
+
@paths[tag] = [] unless @paths[tag]
|
407
|
+
@paths[tag] << path
|
408
|
+
|
409
|
+
result = info.read_result
|
410
|
+
|
411
|
+
# Write result to applicable cache.
|
412
|
+
@cache.set(tag, path, result)
|
413
|
+
# If single-site, cache "after" as "before".
|
414
|
+
@cache.set(:before, path, result) unless @config.roots[:before]
|
415
|
+
|
416
|
+
# TODO: Restore application of rules.
|
417
|
+
# @rules.handle_page(tag, res.content, info.document) if @rules && !res.error
|
283
418
|
end
|
284
419
|
end
|
285
420
|
end
|