sitediff 1.1.1 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.eslintignore +1 -0
- data/.eslintrc.json +28 -0
- data/.project +11 -0
- data/.rubocop.yml +179 -0
- data/.rubocop_todo.yml +51 -0
- data/CHANGELOG.md +28 -0
- data/Dockerfile +33 -0
- data/Gemfile +11 -0
- data/Gemfile.lock +85 -0
- data/INSTALLATION.md +146 -0
- data/LICENSE +339 -0
- data/README.md +810 -0
- data/Rakefile +12 -0
- data/Thorfile +135 -0
- data/config/.gitkeep +0 -0
- data/config/sanitize_domains.example.yaml +8 -0
- data/config/sitediff.example.yaml +81 -0
- data/docker-compose.test.yml +3 -0
- data/lib/sitediff/api.rb +17 -6
- data/lib/sitediff/cache.rb +5 -3
- data/lib/sitediff/cli.rb +4 -3
- data/lib/sitediff/config/creator.rb +13 -13
- data/lib/sitediff/config/preset.rb +6 -6
- data/lib/sitediff/config.rb +9 -9
- data/lib/sitediff/crawler.rb +12 -2
- data/lib/sitediff/diff.rb +1 -1
- data/lib/sitediff/fetch.rb +2 -2
- data/lib/sitediff/files/report.html.erb +1 -1
- data/lib/sitediff/presets/drupal.yaml +63 -0
- data/lib/sitediff/report.rb +6 -6
- data/lib/sitediff/result.rb +5 -5
- data/lib/sitediff/sanitize/dom_transform.rb +2 -2
- data/lib/sitediff/sanitize/regexp.rb +2 -2
- data/lib/sitediff/sanitize.rb +5 -5
- data/lib/sitediff/uriwrapper.rb +8 -10
- data/lib/sitediff/webserver/resultserver.rb +2 -0
- data/lib/sitediff/webserver.rb +3 -0
- data/lib/sitediff.rb +9 -9
- data/misc/sitediff - overview report.png +0 -0
- data/misc/sitediff - page report.png +0 -0
- data/package-lock.json +878 -0
- data/package.json +25 -0
- data/sitediff.gemspec +51 -0
- metadata +62 -18
data/lib/sitediff/report.rb
CHANGED
@@ -96,7 +96,7 @@ class SiteDiff
|
|
96
96
|
if @config.export
|
97
97
|
package_report(dir)
|
98
98
|
else
|
99
|
-
SiteDiff.log
|
99
|
+
SiteDiff.log "Report generated to #{report_file.expand_path}"
|
100
100
|
end
|
101
101
|
end
|
102
102
|
|
@@ -135,7 +135,7 @@ class SiteDiff
|
|
135
135
|
|
136
136
|
write_settings dir
|
137
137
|
|
138
|
-
SiteDiff.log
|
138
|
+
SiteDiff.log "Report generated to #{report_file.expand_path}"
|
139
139
|
end
|
140
140
|
|
141
141
|
##
|
@@ -147,7 +147,7 @@ class SiteDiff
|
|
147
147
|
temp_path.mkpath
|
148
148
|
report_path = temp_path + REPORT_DIR
|
149
149
|
report_path.mkpath
|
150
|
-
files_path = report_path
|
150
|
+
files_path = "#{report_path}files"
|
151
151
|
files_path.mkpath
|
152
152
|
diffs_path = dir + DIFFS_DIR
|
153
153
|
|
@@ -164,7 +164,7 @@ class SiteDiff
|
|
164
164
|
end
|
165
165
|
FileUtils.move(temp_path + REPORT_FILE_TAR, dir)
|
166
166
|
temp_path.rmtree
|
167
|
-
SiteDiff.log
|
167
|
+
SiteDiff.log "Archived report generated to #{dir.join(REPORT_FILE_TAR)}"
|
168
168
|
end
|
169
169
|
|
170
170
|
##
|
@@ -182,7 +182,7 @@ class SiteDiff
|
|
182
182
|
diff_dir.rmtree if diff_dir.exist?
|
183
183
|
|
184
184
|
# Write diffs to the diff directory.
|
185
|
-
@results.each { |r| r.dump(dir, @config.export) if r.status == Result::STATUS_FAILURE }
|
185
|
+
@results.each { |r| r.dump(dir, relative: @config.export) if r.status == Result::STATUS_FAILURE }
|
186
186
|
SiteDiff.log "All diff files written to #{diff_dir.expand_path}" unless @config.export
|
187
187
|
end
|
188
188
|
|
@@ -245,7 +245,7 @@ class SiteDiff
|
|
245
245
|
if File.exist? timestamp_file
|
246
246
|
file = File::Stat.new(timestamp_file)
|
247
247
|
time = file.mtime
|
248
|
-
time.
|
248
|
+
time.instance_of?(Time) ? time.strftime('%Y-%m-%d %H:%M') : ''
|
249
249
|
else
|
250
250
|
'unknown'
|
251
251
|
end
|
data/lib/sitediff/result.rb
CHANGED
@@ -76,19 +76,19 @@ class SiteDiff
|
|
76
76
|
|
77
77
|
# Filename to store diff
|
78
78
|
def filename
|
79
|
-
File.join(Report::DIFFS_DIR, Digest::SHA1.hexdigest(path)
|
79
|
+
File.join(Report::DIFFS_DIR, "#{Digest::SHA1.hexdigest(path)}.html")
|
80
80
|
end
|
81
81
|
|
82
82
|
# Returns a URL to the result diff.
|
83
83
|
#
|
84
84
|
# Returns nil if the result has no diffs.
|
85
|
-
def diff_url(relative
|
85
|
+
def diff_url(relative: false)
|
86
86
|
prefix = relative ? 'files/' : '/files/'
|
87
87
|
return prefix + filename if status == STATUS_FAILURE
|
88
88
|
end
|
89
89
|
|
90
90
|
# Log the result to the terminal
|
91
|
-
def log(verbose
|
91
|
+
def log(verbose: true)
|
92
92
|
case status
|
93
93
|
when STATUS_SUCCESS
|
94
94
|
SiteDiff.log path, :success, 'UNCHANGED'
|
@@ -101,12 +101,12 @@ class SiteDiff
|
|
101
101
|
end
|
102
102
|
|
103
103
|
# Dump the result to a file
|
104
|
-
def dump(dir, relative
|
104
|
+
def dump(dir, relative: false)
|
105
105
|
dump_path = File.join(dir, filename)
|
106
106
|
base = File.dirname(dump_path)
|
107
107
|
FileUtils.mkdir_p(base) unless File.exist?(base)
|
108
108
|
File.open(dump_path, 'w') do |f|
|
109
|
-
f.write(Diff.generate_diff_output(self, relative))
|
109
|
+
f.write(Diff.generate_diff_output(self, relative:))
|
110
110
|
end
|
111
111
|
end
|
112
112
|
end
|
@@ -30,10 +30,10 @@ class SiteDiff
|
|
30
30
|
|
31
31
|
##
|
32
32
|
# TODO: Document what this method does.
|
33
|
-
def targets(node)
|
33
|
+
def targets(node, &block)
|
34
34
|
selectors = to_array(@rule['selector'])
|
35
35
|
selectors.each do |sel|
|
36
|
-
node.css(sel).each
|
36
|
+
node.css(sel).each(&block)
|
37
37
|
end
|
38
38
|
end
|
39
39
|
|
data/lib/sitediff/sanitize.rb
CHANGED
@@ -62,7 +62,7 @@ class SiteDiff
|
|
62
62
|
(rules = @config[name]) || (return nil)
|
63
63
|
|
64
64
|
# Already an array? Do nothing.
|
65
|
-
if rules[0]
|
65
|
+
if rules[0].respond_to?('each') && rules[0]&.fetch('value')
|
66
66
|
# If it is a hash, put it in an array.
|
67
67
|
elsif rules['value']
|
68
68
|
rules = [rules]
|
@@ -146,7 +146,7 @@ class SiteDiff
|
|
146
146
|
def select_regions(node, regions, output)
|
147
147
|
regions = output.map do |name|
|
148
148
|
selector = get_named_region(regions, name)['selector']
|
149
|
-
region = Nokogiri::XML.fragment(
|
149
|
+
region = Nokogiri::XML.fragment("<region id=\"#{name}\"></region>").at_css('region')
|
150
150
|
matching = node.css(selector)
|
151
151
|
matching.each { |m| region.add_child m }
|
152
152
|
region
|
@@ -210,7 +210,7 @@ class SiteDiff
|
|
210
210
|
end
|
211
211
|
|
212
212
|
# Parse HTML into a node
|
213
|
-
def self.domify(str, force_doc
|
213
|
+
def self.domify(str, force_doc: false)
|
214
214
|
if force_doc || /<!DOCTYPE/.match(str[0, 512])
|
215
215
|
Nokogiri::HTML(str)
|
216
216
|
else
|
@@ -224,9 +224,9 @@ class SiteDiff
|
|
224
224
|
obj
|
225
225
|
# node or fragment
|
226
226
|
elsif Nokogiri::XML::Node == obj.class || Nokogiri::HTML::DocumentFragment == obj.class
|
227
|
-
domify(obj.to_s, true)
|
227
|
+
domify(obj.to_s, force_doc: true)
|
228
228
|
else
|
229
|
-
to_document(domify(obj, false))
|
229
|
+
to_document(domify(obj, force_doc: false))
|
230
230
|
end
|
231
231
|
end
|
232
232
|
|
data/lib/sitediff/uriwrapper.rb
CHANGED
@@ -48,7 +48,7 @@ class SiteDiff
|
|
48
48
|
|
49
49
|
##
|
50
50
|
# Creates a UriWrapper.
|
51
|
-
def initialize(uri, curl_opts = DEFAULT_CURL_OPTS, debug
|
51
|
+
def initialize(uri, curl_opts = DEFAULT_CURL_OPTS, debug: true)
|
52
52
|
@uri = uri.respond_to?(:scheme) ? uri : Addressable::URI.parse(uri)
|
53
53
|
# remove trailing '/'s from local URIs
|
54
54
|
@uri.path.gsub!(%r{/*$}, '') if local?
|
@@ -103,10 +103,9 @@ class SiteDiff
|
|
103
103
|
# Returns the encoding of an HTTP response from headers , nil if not
|
104
104
|
# specified.
|
105
105
|
def charset_encoding(http_headers)
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
end
|
106
|
+
content_type = http_headers['Content-Type']
|
107
|
+
if (md = /;\s*charset=([-\w]*)/.match(content_type))
|
108
|
+
md[1]
|
110
109
|
end
|
111
110
|
end
|
112
111
|
|
@@ -117,7 +116,7 @@ class SiteDiff
|
|
117
116
|
def typhoeus_request
|
118
117
|
params = @curl_opts.dup
|
119
118
|
# Allow basic auth
|
120
|
-
params[:userpwd] = @uri.user
|
119
|
+
params[:userpwd] = "#{@uri.user}: #{@uri.password}" if @uri.user
|
121
120
|
|
122
121
|
req = Typhoeus::Request.new(to_s, params)
|
123
122
|
|
@@ -150,19 +149,18 @@ class SiteDiff
|
|
150
149
|
|
151
150
|
req.on_failure do |resp|
|
152
151
|
if resp&.status_message
|
153
|
-
msg = resp.status_message
|
154
152
|
yield ReadResult.error(
|
155
|
-
"HTTP error when loading #{@uri}: #{
|
153
|
+
"HTTP error when loading #{@uri} : [#{resp.response_code}] #{resp.status_message}",
|
156
154
|
resp.response_code
|
157
155
|
)
|
158
156
|
elsif (msg = resp.options[:return_code])
|
159
157
|
yield ReadResult.error(
|
160
|
-
"Connection error when loading #{@uri}: #{msg}",
|
158
|
+
"Connection error when loading #{@uri} : [#{resp.options[:return_code]}] #{resp.status_message} #{msg}",
|
161
159
|
resp.response_code
|
162
160
|
)
|
163
161
|
else
|
164
162
|
yield ReadResult.error(
|
165
|
-
"Unknown error when loading #{@uri}: #{
|
163
|
+
"Unknown error when loading #{@uri} : [#{resp.response_code}] #{resp.status_message}",
|
166
164
|
resp.response_code
|
167
165
|
)
|
168
166
|
end
|
@@ -14,6 +14,7 @@ class SiteDiff
|
|
14
14
|
##
|
15
15
|
# Creates a Cache Servlet.
|
16
16
|
def initialize(_server, cache)
|
17
|
+
super
|
17
18
|
@cache = cache
|
18
19
|
end
|
19
20
|
|
@@ -41,6 +42,7 @@ class SiteDiff
|
|
41
42
|
##
|
42
43
|
# Creates a Side By Side Servlet.
|
43
44
|
def initialize(_server, cache, settings)
|
45
|
+
super
|
44
46
|
@cache = cache
|
45
47
|
@settings = settings
|
46
48
|
end
|
data/lib/sitediff/webserver.rb
CHANGED
@@ -77,15 +77,18 @@ class SiteDiff
|
|
77
77
|
BASE = 'spec/sites/ruby-doc.org'
|
78
78
|
NAMES = %w[core-1.9.3 core-2.0].freeze
|
79
79
|
|
80
|
+
# Initialize web server.
|
80
81
|
def initialize(port = PORT, base = BASE, names = NAMES)
|
81
82
|
dirs = names.map { |n| File.join(base, n) }
|
82
83
|
super(port, dirs, quiet: true)
|
83
84
|
end
|
84
85
|
|
86
|
+
# Get the before site uri.
|
85
87
|
def before
|
86
88
|
uris.first
|
87
89
|
end
|
88
90
|
|
91
|
+
# Get the after site uri.
|
89
92
|
def after
|
90
93
|
uris.last
|
91
94
|
end
|
data/lib/sitediff.rb
CHANGED
@@ -47,7 +47,7 @@ class SiteDiff
|
|
47
47
|
bg = :yellow
|
48
48
|
end
|
49
49
|
|
50
|
-
label =
|
50
|
+
label = "[#{label}]"
|
51
51
|
label = Rainbow(label)
|
52
52
|
label = label.bg(bg) if bg
|
53
53
|
label = label.fg(fg) if fg
|
@@ -76,7 +76,7 @@ class SiteDiff
|
|
76
76
|
end
|
77
77
|
|
78
78
|
# Initialize SiteDiff.
|
79
|
-
def initialize(config, cache, verbose
|
79
|
+
def initialize(config, cache, verbose: true, debug: false)
|
80
80
|
@cache = cache
|
81
81
|
@verbose = verbose
|
82
82
|
@debug = debug
|
@@ -97,7 +97,7 @@ class SiteDiff
|
|
97
97
|
end
|
98
98
|
|
99
99
|
# Sanitize HTML.
|
100
|
-
def sanitize(
|
100
|
+
def sanitize(path_passed, read_results)
|
101
101
|
%i[before after].map do |tag|
|
102
102
|
html = read_results[tag].content
|
103
103
|
# TODO: See why encoding is empty while running tests.
|
@@ -107,8 +107,8 @@ class SiteDiff
|
|
107
107
|
# during rspec tests for some reason.
|
108
108
|
encoding = read_results[tag].encoding
|
109
109
|
if encoding || html.length.positive?
|
110
|
-
section = @config.send(tag, true)
|
111
|
-
opts = { path:
|
110
|
+
section = @config.send(tag, apply_preset: true)
|
111
|
+
opts = { path: path_passed }
|
112
112
|
opts[:output] = @config.output if @config.output
|
113
113
|
Sanitizer.new(html, section, opts).sanitize
|
114
114
|
else
|
@@ -144,7 +144,7 @@ class SiteDiff
|
|
144
144
|
|
145
145
|
# Print results in order!
|
146
146
|
while (next_diff = @results[@ordered.first])
|
147
|
-
next_diff.log(@verbose)
|
147
|
+
next_diff.log(verbose: @verbose)
|
148
148
|
@ordered.shift
|
149
149
|
end
|
150
150
|
end
|
@@ -160,7 +160,7 @@ class SiteDiff
|
|
160
160
|
@ordered = @config.paths.dup
|
161
161
|
|
162
162
|
unless @cache.read_tags.empty?
|
163
|
-
SiteDiff.log(
|
163
|
+
SiteDiff.log("Using sites from cache: #{@cache.read_tags.sort.join(', ')}")
|
164
164
|
end
|
165
165
|
|
166
166
|
# TODO: Fix this after config merge refactor!
|
@@ -175,7 +175,7 @@ class SiteDiff
|
|
175
175
|
@config.setting(:interval),
|
176
176
|
@config.setting(:concurrency),
|
177
177
|
curl_opts,
|
178
|
-
@debug,
|
178
|
+
debug: @debug,
|
179
179
|
before: @config.before_url,
|
180
180
|
after: @config.after_url
|
181
181
|
)
|
@@ -203,7 +203,7 @@ class SiteDiff
|
|
203
203
|
##
|
204
204
|
# Get SiteDiff gemspec.
|
205
205
|
def self.gemspec
|
206
|
-
file = ROOT_DIR
|
206
|
+
file = "#{ROOT_DIR}/sitediff.gemspec"
|
207
207
|
Gem::Specification.load(file)
|
208
208
|
end
|
209
209
|
|
Binary file
|
Binary file
|