sitediff 0.0.6 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.eslintignore +1 -0
- data/.eslintrc.json +28 -0
- data/.project +11 -0
- data/.rubocop.yml +179 -0
- data/.rubocop_todo.yml +51 -0
- data/CHANGELOG.md +28 -0
- data/Dockerfile +33 -0
- data/Gemfile +11 -0
- data/Gemfile.lock +85 -0
- data/INSTALLATION.md +146 -0
- data/LICENSE +339 -0
- data/README.md +810 -0
- data/Rakefile +12 -0
- data/Thorfile +135 -0
- data/bin/sitediff +9 -2
- data/config/.gitkeep +0 -0
- data/config/sanitize_domains.example.yaml +8 -0
- data/config/sitediff.example.yaml +81 -0
- data/docker-compose.test.yml +3 -0
- data/lib/sitediff/api.rb +276 -0
- data/lib/sitediff/cache.rb +57 -8
- data/lib/sitediff/cli.rb +156 -176
- data/lib/sitediff/config/creator.rb +61 -77
- data/lib/sitediff/config/preset.rb +75 -0
- data/lib/sitediff/config.rb +436 -31
- data/lib/sitediff/crawler.rb +27 -21
- data/lib/sitediff/diff.rb +32 -9
- data/lib/sitediff/fetch.rb +10 -3
- data/lib/sitediff/files/diff.html.erb +20 -2
- data/lib/sitediff/files/jquery.min.js +2 -0
- data/lib/sitediff/files/normalize.css +349 -0
- data/lib/sitediff/files/report.html.erb +171 -0
- data/lib/sitediff/files/sidebyside.html.erb +5 -2
- data/lib/sitediff/files/sitediff.css +303 -30
- data/lib/sitediff/files/sitediff.js +367 -0
- data/lib/sitediff/presets/drupal.yaml +63 -0
- data/lib/sitediff/report.rb +254 -0
- data/lib/sitediff/result.rb +50 -20
- data/lib/sitediff/sanitize/dom_transform.rb +47 -8
- data/lib/sitediff/sanitize/regexp.rb +24 -3
- data/lib/sitediff/sanitize.rb +81 -12
- data/lib/sitediff/uriwrapper.rb +65 -23
- data/lib/sitediff/webserver/resultserver.rb +30 -33
- data/lib/sitediff/webserver.rb +15 -3
- data/lib/sitediff.rb +130 -83
- data/misc/sitediff - overview report.png +0 -0
- data/misc/sitediff - page report.png +0 -0
- data/package-lock.json +878 -0
- data/package.json +25 -0
- data/sitediff.gemspec +51 -0
- metadata +91 -29
- data/lib/sitediff/files/html_report.html.erb +0 -66
- data/lib/sitediff/files/rules/drupal.yaml +0 -63
- data/lib/sitediff/rules.rb +0 -65
data/lib/sitediff.rb
CHANGED
@@ -2,63 +2,85 @@
|
|
2
2
|
# frozen_string_literal: true
|
3
3
|
|
4
4
|
require 'sitediff/config'
|
5
|
+
require 'sitediff/diff'
|
5
6
|
require 'sitediff/fetch'
|
6
7
|
require 'sitediff/result'
|
8
|
+
require 'sitediff/report'
|
7
9
|
require 'pathname'
|
8
10
|
require 'rainbow'
|
11
|
+
require 'rubygems'
|
9
12
|
require 'yaml'
|
10
13
|
|
14
|
+
# SiteDiff Object.
|
11
15
|
class SiteDiff
|
12
|
-
|
16
|
+
attr_reader :config, :results
|
17
|
+
|
18
|
+
# SiteDiff installation directory.
|
19
|
+
ROOT_DIR = File.dirname(File.dirname(__FILE__))
|
20
|
+
|
21
|
+
# Path to misc files. Ex: *.erb, *.css.
|
13
22
|
FILES_DIR = File.join(File.dirname(__FILE__), 'sitediff', 'files')
|
14
23
|
|
15
|
-
#
|
16
|
-
|
17
|
-
|
18
|
-
#
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
bg = fg = nil
|
28
|
-
case type
|
29
|
-
when :info
|
30
|
-
bg = fg = nil
|
31
|
-
when :diff_success
|
32
|
-
bg = :green
|
24
|
+
# Logs a message.
|
25
|
+
#
|
26
|
+
# Label will be colorized and message will not.
|
27
|
+
# Type dictates the color: can be :success, :error, or :failure.
|
28
|
+
#
|
29
|
+
# TODO: Only print :debug messages in debug mode.
|
30
|
+
def self.log(message, type = :info, label = nil)
|
31
|
+
# Prepare label.
|
32
|
+
label ||= type unless type == :info
|
33
|
+
label = label.to_s
|
34
|
+
unless label.empty?
|
35
|
+
# Colorize label.
|
33
36
|
fg = :black
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
37
|
+
bg = :blue
|
38
|
+
|
39
|
+
case type
|
40
|
+
when :info
|
41
|
+
bg = :cyan
|
42
|
+
when :success
|
43
|
+
bg = :green
|
44
|
+
when :error
|
45
|
+
bg = :red
|
46
|
+
when :warning
|
47
|
+
bg = :yellow
|
48
|
+
end
|
49
|
+
|
50
|
+
label = "[#{label}]"
|
51
|
+
label = Rainbow(label)
|
52
|
+
label = label.bg(bg) if bg
|
53
|
+
label = label.fg(fg) if fg
|
54
|
+
|
55
|
+
# Add a space after the label.
|
56
|
+
label += ' '
|
41
57
|
end
|
42
|
-
|
43
|
-
|
44
|
-
label = label.fg(fg) if fg
|
45
|
-
puts label + ' ' + str
|
58
|
+
|
59
|
+
puts label + message
|
46
60
|
end
|
47
61
|
|
48
|
-
|
62
|
+
##
|
63
|
+
# Returns the "before" site's URL.
|
64
|
+
#
|
65
|
+
# TODO: Remove in favor of config.before_url.
|
49
66
|
def before
|
50
67
|
@config.before['url']
|
51
68
|
end
|
52
69
|
|
70
|
+
##
|
71
|
+
# Returns the "after" site's URL.
|
72
|
+
#
|
73
|
+
# TODO: Remove in favor of config.after_url.
|
53
74
|
def after
|
54
75
|
@config.after['url']
|
55
76
|
end
|
56
77
|
|
57
|
-
|
78
|
+
# Initialize SiteDiff.
|
79
|
+
def initialize(config, cache, verbose: true, debug: false)
|
58
80
|
@cache = cache
|
59
81
|
@verbose = verbose
|
60
82
|
@debug = debug
|
61
|
-
|
83
|
+
|
62
84
|
# Check for single-site mode
|
63
85
|
validate_opts = {}
|
64
86
|
if !config.before['url'] && @cache.tag?(:before)
|
@@ -69,37 +91,50 @@ class SiteDiff
|
|
69
91
|
validate_opts[:need_before] = false
|
70
92
|
end
|
71
93
|
config.validate(validate_opts)
|
72
|
-
|
73
|
-
|
94
|
+
# Configure diff.
|
95
|
+
Diff.diff_config(config)
|
74
96
|
@config = config
|
75
97
|
end
|
76
98
|
|
77
|
-
# Sanitize HTML
|
78
|
-
def sanitize(
|
99
|
+
# Sanitize HTML.
|
100
|
+
def sanitize(path_passed, read_results)
|
79
101
|
%i[before after].map do |tag|
|
80
102
|
html = read_results[tag].content
|
103
|
+
# TODO: See why encoding is empty while running tests.
|
104
|
+
#
|
105
|
+
# The presence of an "encoding" value used to be used to determine
|
106
|
+
# if the sanitizer would be called. However, encoding turns up blank
|
107
|
+
# during rspec tests for some reason.
|
81
108
|
encoding = read_results[tag].encoding
|
82
|
-
if encoding
|
83
|
-
|
84
|
-
|
109
|
+
if encoding || html.length.positive?
|
110
|
+
section = @config.send(tag, apply_preset: true)
|
111
|
+
opts = { path: path_passed }
|
112
|
+
opts[:output] = @config.output if @config.output
|
113
|
+
Sanitizer.new(html, section, opts).sanitize
|
85
114
|
else
|
86
115
|
html
|
87
116
|
end
|
88
117
|
end
|
89
118
|
end
|
90
119
|
|
91
|
-
|
120
|
+
##
|
121
|
+
# Process a set of read results.
|
122
|
+
#
|
123
|
+
# This is the callback that processes items fetched by the Fetcher.
|
92
124
|
def process_results(path, read_results)
|
93
|
-
|
125
|
+
error = (read_results[:before].error || read_results[:after].error)
|
126
|
+
if error
|
94
127
|
diff = Result.new(path, nil, nil, nil, nil, error)
|
95
128
|
else
|
96
129
|
begin
|
97
|
-
diff = Result.new(
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
130
|
+
diff = Result.new(
|
131
|
+
path,
|
132
|
+
*sanitize(path, read_results),
|
133
|
+
read_results[:before].encoding,
|
134
|
+
read_results[:after].encoding,
|
135
|
+
nil
|
136
|
+
)
|
137
|
+
rescue StandardError => e
|
103
138
|
raise if @debug
|
104
139
|
|
105
140
|
Result.new(path, nil, nil, nil, nil, "Sanitization error: #{e}")
|
@@ -109,65 +144,77 @@ class SiteDiff
|
|
109
144
|
|
110
145
|
# Print results in order!
|
111
146
|
while (next_diff = @results[@ordered.first])
|
112
|
-
next_diff.log(@verbose)
|
147
|
+
next_diff.log(verbose: @verbose)
|
113
148
|
@ordered.shift
|
114
149
|
end
|
115
150
|
end
|
116
151
|
|
117
|
-
|
118
|
-
#
|
119
|
-
|
152
|
+
##
|
153
|
+
# Compute diff as per config.
|
154
|
+
#
|
155
|
+
# @return [Integer]
|
156
|
+
# Number of paths which have diffs.
|
157
|
+
def run
|
120
158
|
# Map of path -> Result object, populated by process_results
|
121
159
|
@results = {}
|
122
160
|
@ordered = @config.paths.dup
|
123
161
|
|
124
162
|
unless @cache.read_tags.empty?
|
125
|
-
SiteDiff.log(
|
126
|
-
@cache.read_tags.sort.join(', '))
|
163
|
+
SiteDiff.log("Using sites from cache: #{@cache.read_tags.sort.join(', ')}")
|
127
164
|
end
|
128
165
|
|
129
166
|
# TODO: Fix this after config merge refactor!
|
130
167
|
# Not quite right. We are not passing @config.before or @config.after
|
131
168
|
# so passing this instead but @config.after['curl_opts'] is ignored.
|
169
|
+
curl_opts = @config.setting :curl_opts
|
132
170
|
config_curl_opts = @config.before['curl_opts']
|
133
171
|
curl_opts = config_curl_opts.clone.merge(curl_opts) if config_curl_opts
|
134
|
-
fetcher = Fetch.new(
|
135
|
-
|
172
|
+
fetcher = Fetch.new(
|
173
|
+
@cache,
|
174
|
+
@config.paths,
|
175
|
+
@config.setting(:interval),
|
176
|
+
@config.setting(:concurrency),
|
177
|
+
curl_opts,
|
178
|
+
debug: @debug,
|
179
|
+
before: @config.before_url,
|
180
|
+
after: @config.after_url
|
181
|
+
)
|
182
|
+
|
183
|
+
# Run the Fetcher with "process results" as a callback.
|
136
184
|
fetcher.run(&method(:process_results))
|
137
185
|
|
138
186
|
# Order by original path order
|
139
|
-
@results = @config.paths.map { |
|
187
|
+
@results = @config.paths.map { |path| @results[path] }
|
140
188
|
results.map { |r| r unless r.success? }.compact.length
|
141
189
|
end
|
142
190
|
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
# store diffs of each failing case, first wipe out existing diffs
|
151
|
-
diff_dir = dir + DIFFS_DIR
|
152
|
-
diff_dir.rmtree if diff_dir.exist?
|
153
|
-
results.each { |r| r.dump(dir) if r.status == Result::STATUS_FAILURE }
|
154
|
-
SiteDiff.log "All diff files were dumped inside #{dir.expand_path}"
|
155
|
-
|
156
|
-
# store failing paths
|
157
|
-
failures = dir + FAILURES_FILE
|
158
|
-
SiteDiff.log "Writing failures to #{failures.expand_path}"
|
159
|
-
failures.open('w') do |f|
|
160
|
-
results.each { |r| f.puts r.path unless r.success? }
|
191
|
+
##
|
192
|
+
# Get a reporter object to help with report generation.
|
193
|
+
def report
|
194
|
+
if @results.nil?
|
195
|
+
raise SiteDiffException(
|
196
|
+
'No results detected. Run SiteDiff.run before SiteDiff.report.'
|
197
|
+
)
|
161
198
|
end
|
162
199
|
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
200
|
+
Report.new(@config, @cache, @results)
|
201
|
+
end
|
202
|
+
|
203
|
+
##
|
204
|
+
# Get SiteDiff gemspec.
|
205
|
+
def self.gemspec
|
206
|
+
file = "#{ROOT_DIR}/sitediff.gemspec"
|
207
|
+
Gem::Specification.load(file)
|
208
|
+
end
|
167
209
|
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
210
|
+
##
|
211
|
+
# Ensures that a directory exists and returns a Pathname for it.
|
212
|
+
#
|
213
|
+
# @param [String] dir
|
214
|
+
# path/to/directory
|
215
|
+
def self.ensure_dir(dir)
|
216
|
+
dir = Pathname.new(dir) unless dir.is_a? Pathname
|
217
|
+
dir.mkpath unless dir.directory?
|
218
|
+
dir
|
172
219
|
end
|
173
220
|
end
|
Binary file
|
Binary file
|