sitediff 1.1.1 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.eslintignore +1 -0
- data/.eslintrc.json +28 -0
- data/.project +11 -0
- data/.rubocop.yml +179 -0
- data/.rubocop_todo.yml +51 -0
- data/CHANGELOG.md +28 -0
- data/Dockerfile +33 -0
- data/Gemfile +11 -0
- data/Gemfile.lock +85 -0
- data/INSTALLATION.md +146 -0
- data/LICENSE +339 -0
- data/README.md +810 -0
- data/Rakefile +12 -0
- data/Thorfile +135 -0
- data/config/.gitkeep +0 -0
- data/config/sanitize_domains.example.yaml +8 -0
- data/config/sitediff.example.yaml +81 -0
- data/docker-compose.test.yml +3 -0
- data/lib/sitediff/api.rb +17 -6
- data/lib/sitediff/cache.rb +5 -3
- data/lib/sitediff/cli.rb +4 -3
- data/lib/sitediff/config/creator.rb +13 -13
- data/lib/sitediff/config/preset.rb +6 -6
- data/lib/sitediff/config.rb +9 -9
- data/lib/sitediff/crawler.rb +12 -2
- data/lib/sitediff/diff.rb +1 -1
- data/lib/sitediff/fetch.rb +2 -2
- data/lib/sitediff/files/report.html.erb +1 -1
- data/lib/sitediff/presets/drupal.yaml +63 -0
- data/lib/sitediff/report.rb +6 -6
- data/lib/sitediff/result.rb +5 -5
- data/lib/sitediff/sanitize/dom_transform.rb +2 -2
- data/lib/sitediff/sanitize/regexp.rb +2 -2
- data/lib/sitediff/sanitize.rb +5 -5
- data/lib/sitediff/uriwrapper.rb +8 -10
- data/lib/sitediff/webserver/resultserver.rb +2 -0
- data/lib/sitediff/webserver.rb +3 -0
- data/lib/sitediff.rb +9 -9
- data/misc/sitediff - overview report.png +0 -0
- data/misc/sitediff - page report.png +0 -0
- data/package-lock.json +878 -0
- data/package.json +25 -0
- data/sitediff.gemspec +51 -0
- metadata +62 -18
data/lib/sitediff/report.rb
CHANGED
@@ -96,7 +96,7 @@ class SiteDiff
|
|
96
96
|
if @config.export
|
97
97
|
package_report(dir)
|
98
98
|
else
|
99
|
-
SiteDiff.log
|
99
|
+
SiteDiff.log "Report generated to #{report_file.expand_path}"
|
100
100
|
end
|
101
101
|
end
|
102
102
|
|
@@ -135,7 +135,7 @@ class SiteDiff
|
|
135
135
|
|
136
136
|
write_settings dir
|
137
137
|
|
138
|
-
SiteDiff.log
|
138
|
+
SiteDiff.log "Report generated to #{report_file.expand_path}"
|
139
139
|
end
|
140
140
|
|
141
141
|
##
|
@@ -147,7 +147,7 @@ class SiteDiff
|
|
147
147
|
temp_path.mkpath
|
148
148
|
report_path = temp_path + REPORT_DIR
|
149
149
|
report_path.mkpath
|
150
|
-
files_path = report_path
|
150
|
+
files_path = "#{report_path}files"
|
151
151
|
files_path.mkpath
|
152
152
|
diffs_path = dir + DIFFS_DIR
|
153
153
|
|
@@ -164,7 +164,7 @@ class SiteDiff
|
|
164
164
|
end
|
165
165
|
FileUtils.move(temp_path + REPORT_FILE_TAR, dir)
|
166
166
|
temp_path.rmtree
|
167
|
-
SiteDiff.log
|
167
|
+
SiteDiff.log "Archived report generated to #{dir.join(REPORT_FILE_TAR)}"
|
168
168
|
end
|
169
169
|
|
170
170
|
##
|
@@ -182,7 +182,7 @@ class SiteDiff
|
|
182
182
|
diff_dir.rmtree if diff_dir.exist?
|
183
183
|
|
184
184
|
# Write diffs to the diff directory.
|
185
|
-
@results.each { |r| r.dump(dir, @config.export) if r.status == Result::STATUS_FAILURE }
|
185
|
+
@results.each { |r| r.dump(dir, relative: @config.export) if r.status == Result::STATUS_FAILURE }
|
186
186
|
SiteDiff.log "All diff files written to #{diff_dir.expand_path}" unless @config.export
|
187
187
|
end
|
188
188
|
|
@@ -245,7 +245,7 @@ class SiteDiff
|
|
245
245
|
if File.exist? timestamp_file
|
246
246
|
file = File::Stat.new(timestamp_file)
|
247
247
|
time = file.mtime
|
248
|
-
time.
|
248
|
+
time.instance_of?(Time) ? time.strftime('%Y-%m-%d %H:%M') : ''
|
249
249
|
else
|
250
250
|
'unknown'
|
251
251
|
end
|
data/lib/sitediff/result.rb
CHANGED
@@ -76,19 +76,19 @@ class SiteDiff
|
|
76
76
|
|
77
77
|
# Filename to store diff
|
78
78
|
def filename
|
79
|
-
File.join(Report::DIFFS_DIR, Digest::SHA1.hexdigest(path)
|
79
|
+
File.join(Report::DIFFS_DIR, "#{Digest::SHA1.hexdigest(path)}.html")
|
80
80
|
end
|
81
81
|
|
82
82
|
# Returns a URL to the result diff.
|
83
83
|
#
|
84
84
|
# Returns nil if the result has no diffs.
|
85
|
-
def diff_url(relative
|
85
|
+
def diff_url(relative: false)
|
86
86
|
prefix = relative ? 'files/' : '/files/'
|
87
87
|
return prefix + filename if status == STATUS_FAILURE
|
88
88
|
end
|
89
89
|
|
90
90
|
# Log the result to the terminal
|
91
|
-
def log(verbose
|
91
|
+
def log(verbose: true)
|
92
92
|
case status
|
93
93
|
when STATUS_SUCCESS
|
94
94
|
SiteDiff.log path, :success, 'UNCHANGED'
|
@@ -101,12 +101,12 @@ class SiteDiff
|
|
101
101
|
end
|
102
102
|
|
103
103
|
# Dump the result to a file
|
104
|
-
def dump(dir, relative
|
104
|
+
def dump(dir, relative: false)
|
105
105
|
dump_path = File.join(dir, filename)
|
106
106
|
base = File.dirname(dump_path)
|
107
107
|
FileUtils.mkdir_p(base) unless File.exist?(base)
|
108
108
|
File.open(dump_path, 'w') do |f|
|
109
|
-
f.write(Diff.generate_diff_output(self, relative))
|
109
|
+
f.write(Diff.generate_diff_output(self, relative:))
|
110
110
|
end
|
111
111
|
end
|
112
112
|
end
|
@@ -30,10 +30,10 @@ class SiteDiff
|
|
30
30
|
|
31
31
|
##
|
32
32
|
# TODO: Document what this method does.
|
33
|
-
def targets(node)
|
33
|
+
def targets(node, &block)
|
34
34
|
selectors = to_array(@rule['selector'])
|
35
35
|
selectors.each do |sel|
|
36
|
-
node.css(sel).each
|
36
|
+
node.css(sel).each(&block)
|
37
37
|
end
|
38
38
|
end
|
39
39
|
|
data/lib/sitediff/sanitize.rb
CHANGED
@@ -62,7 +62,7 @@ class SiteDiff
|
|
62
62
|
(rules = @config[name]) || (return nil)
|
63
63
|
|
64
64
|
# Already an array? Do nothing.
|
65
|
-
if rules[0]
|
65
|
+
if rules[0].respond_to?('each') && rules[0]&.fetch('value')
|
66
66
|
# If it is a hash, put it in an array.
|
67
67
|
elsif rules['value']
|
68
68
|
rules = [rules]
|
@@ -146,7 +146,7 @@ class SiteDiff
|
|
146
146
|
def select_regions(node, regions, output)
|
147
147
|
regions = output.map do |name|
|
148
148
|
selector = get_named_region(regions, name)['selector']
|
149
|
-
region = Nokogiri::XML.fragment(
|
149
|
+
region = Nokogiri::XML.fragment("<region id=\"#{name}\"></region>").at_css('region')
|
150
150
|
matching = node.css(selector)
|
151
151
|
matching.each { |m| region.add_child m }
|
152
152
|
region
|
@@ -210,7 +210,7 @@ class SiteDiff
|
|
210
210
|
end
|
211
211
|
|
212
212
|
# Parse HTML into a node
|
213
|
-
def self.domify(str, force_doc
|
213
|
+
def self.domify(str, force_doc: false)
|
214
214
|
if force_doc || /<!DOCTYPE/.match(str[0, 512])
|
215
215
|
Nokogiri::HTML(str)
|
216
216
|
else
|
@@ -224,9 +224,9 @@ class SiteDiff
|
|
224
224
|
obj
|
225
225
|
# node or fragment
|
226
226
|
elsif Nokogiri::XML::Node == obj.class || Nokogiri::HTML::DocumentFragment == obj.class
|
227
|
-
domify(obj.to_s, true)
|
227
|
+
domify(obj.to_s, force_doc: true)
|
228
228
|
else
|
229
|
-
to_document(domify(obj, false))
|
229
|
+
to_document(domify(obj, force_doc: false))
|
230
230
|
end
|
231
231
|
end
|
232
232
|
|
data/lib/sitediff/uriwrapper.rb
CHANGED
@@ -48,7 +48,7 @@ class SiteDiff
|
|
48
48
|
|
49
49
|
##
|
50
50
|
# Creates a UriWrapper.
|
51
|
-
def initialize(uri, curl_opts = DEFAULT_CURL_OPTS, debug
|
51
|
+
def initialize(uri, curl_opts = DEFAULT_CURL_OPTS, debug: true)
|
52
52
|
@uri = uri.respond_to?(:scheme) ? uri : Addressable::URI.parse(uri)
|
53
53
|
# remove trailing '/'s from local URIs
|
54
54
|
@uri.path.gsub!(%r{/*$}, '') if local?
|
@@ -103,10 +103,9 @@ class SiteDiff
|
|
103
103
|
# Returns the encoding of an HTTP response from headers , nil if not
|
104
104
|
# specified.
|
105
105
|
def charset_encoding(http_headers)
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
end
|
106
|
+
content_type = http_headers['Content-Type']
|
107
|
+
if (md = /;\s*charset=([-\w]*)/.match(content_type))
|
108
|
+
md[1]
|
110
109
|
end
|
111
110
|
end
|
112
111
|
|
@@ -117,7 +116,7 @@ class SiteDiff
|
|
117
116
|
def typhoeus_request
|
118
117
|
params = @curl_opts.dup
|
119
118
|
# Allow basic auth
|
120
|
-
params[:userpwd] = @uri.user
|
119
|
+
params[:userpwd] = "#{@uri.user}: #{@uri.password}" if @uri.user
|
121
120
|
|
122
121
|
req = Typhoeus::Request.new(to_s, params)
|
123
122
|
|
@@ -150,19 +149,18 @@ class SiteDiff
|
|
150
149
|
|
151
150
|
req.on_failure do |resp|
|
152
151
|
if resp&.status_message
|
153
|
-
msg = resp.status_message
|
154
152
|
yield ReadResult.error(
|
155
|
-
"HTTP error when loading #{@uri}: #{
|
153
|
+
"HTTP error when loading #{@uri} : [#{resp.response_code}] #{resp.status_message}",
|
156
154
|
resp.response_code
|
157
155
|
)
|
158
156
|
elsif (msg = resp.options[:return_code])
|
159
157
|
yield ReadResult.error(
|
160
|
-
"Connection error when loading #{@uri}: #{msg}",
|
158
|
+
"Connection error when loading #{@uri} : [#{resp.options[:return_code]}] #{resp.status_message} #{msg}",
|
161
159
|
resp.response_code
|
162
160
|
)
|
163
161
|
else
|
164
162
|
yield ReadResult.error(
|
165
|
-
"Unknown error when loading #{@uri}: #{
|
163
|
+
"Unknown error when loading #{@uri} : [#{resp.response_code}] #{resp.status_message}",
|
166
164
|
resp.response_code
|
167
165
|
)
|
168
166
|
end
|
@@ -14,6 +14,7 @@ class SiteDiff
|
|
14
14
|
##
|
15
15
|
# Creates a Cache Servlet.
|
16
16
|
def initialize(_server, cache)
|
17
|
+
super
|
17
18
|
@cache = cache
|
18
19
|
end
|
19
20
|
|
@@ -41,6 +42,7 @@ class SiteDiff
|
|
41
42
|
##
|
42
43
|
# Creates a Side By Side Servlet.
|
43
44
|
def initialize(_server, cache, settings)
|
45
|
+
super
|
44
46
|
@cache = cache
|
45
47
|
@settings = settings
|
46
48
|
end
|
data/lib/sitediff/webserver.rb
CHANGED
@@ -77,15 +77,18 @@ class SiteDiff
|
|
77
77
|
BASE = 'spec/sites/ruby-doc.org'
|
78
78
|
NAMES = %w[core-1.9.3 core-2.0].freeze
|
79
79
|
|
80
|
+
# Initialize web server.
|
80
81
|
def initialize(port = PORT, base = BASE, names = NAMES)
|
81
82
|
dirs = names.map { |n| File.join(base, n) }
|
82
83
|
super(port, dirs, quiet: true)
|
83
84
|
end
|
84
85
|
|
86
|
+
# Get the before site uri.
|
85
87
|
def before
|
86
88
|
uris.first
|
87
89
|
end
|
88
90
|
|
91
|
+
# Get the after site uri.
|
89
92
|
def after
|
90
93
|
uris.last
|
91
94
|
end
|
data/lib/sitediff.rb
CHANGED
@@ -47,7 +47,7 @@ class SiteDiff
|
|
47
47
|
bg = :yellow
|
48
48
|
end
|
49
49
|
|
50
|
-
label =
|
50
|
+
label = "[#{label}]"
|
51
51
|
label = Rainbow(label)
|
52
52
|
label = label.bg(bg) if bg
|
53
53
|
label = label.fg(fg) if fg
|
@@ -76,7 +76,7 @@ class SiteDiff
|
|
76
76
|
end
|
77
77
|
|
78
78
|
# Initialize SiteDiff.
|
79
|
-
def initialize(config, cache, verbose
|
79
|
+
def initialize(config, cache, verbose: true, debug: false)
|
80
80
|
@cache = cache
|
81
81
|
@verbose = verbose
|
82
82
|
@debug = debug
|
@@ -97,7 +97,7 @@ class SiteDiff
|
|
97
97
|
end
|
98
98
|
|
99
99
|
# Sanitize HTML.
|
100
|
-
def sanitize(
|
100
|
+
def sanitize(path_passed, read_results)
|
101
101
|
%i[before after].map do |tag|
|
102
102
|
html = read_results[tag].content
|
103
103
|
# TODO: See why encoding is empty while running tests.
|
@@ -107,8 +107,8 @@ class SiteDiff
|
|
107
107
|
# during rspec tests for some reason.
|
108
108
|
encoding = read_results[tag].encoding
|
109
109
|
if encoding || html.length.positive?
|
110
|
-
section = @config.send(tag, true)
|
111
|
-
opts = { path:
|
110
|
+
section = @config.send(tag, apply_preset: true)
|
111
|
+
opts = { path: path_passed }
|
112
112
|
opts[:output] = @config.output if @config.output
|
113
113
|
Sanitizer.new(html, section, opts).sanitize
|
114
114
|
else
|
@@ -144,7 +144,7 @@ class SiteDiff
|
|
144
144
|
|
145
145
|
# Print results in order!
|
146
146
|
while (next_diff = @results[@ordered.first])
|
147
|
-
next_diff.log(@verbose)
|
147
|
+
next_diff.log(verbose: @verbose)
|
148
148
|
@ordered.shift
|
149
149
|
end
|
150
150
|
end
|
@@ -160,7 +160,7 @@ class SiteDiff
|
|
160
160
|
@ordered = @config.paths.dup
|
161
161
|
|
162
162
|
unless @cache.read_tags.empty?
|
163
|
-
SiteDiff.log(
|
163
|
+
SiteDiff.log("Using sites from cache: #{@cache.read_tags.sort.join(', ')}")
|
164
164
|
end
|
165
165
|
|
166
166
|
# TODO: Fix this after config merge refactor!
|
@@ -175,7 +175,7 @@ class SiteDiff
|
|
175
175
|
@config.setting(:interval),
|
176
176
|
@config.setting(:concurrency),
|
177
177
|
curl_opts,
|
178
|
-
@debug,
|
178
|
+
debug: @debug,
|
179
179
|
before: @config.before_url,
|
180
180
|
after: @config.after_url
|
181
181
|
)
|
@@ -203,7 +203,7 @@ class SiteDiff
|
|
203
203
|
##
|
204
204
|
# Get SiteDiff gemspec.
|
205
205
|
def self.gemspec
|
206
|
-
file = ROOT_DIR
|
206
|
+
file = "#{ROOT_DIR}/sitediff.gemspec"
|
207
207
|
Gem::Specification.load(file)
|
208
208
|
end
|
209
209
|
|
Binary file
|
Binary file
|