sitediff 1.1.1 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.eslintignore +1 -0
- data/.eslintrc.json +28 -0
- data/.project +11 -0
- data/.rubocop.yml +179 -0
- data/.rubocop_todo.yml +51 -0
- data/CHANGELOG.md +33 -0
- data/Dockerfile +33 -0
- data/Gemfile +11 -0
- data/Gemfile.lock +85 -0
- data/INSTALLATION.md +146 -0
- data/LICENSE +339 -0
- data/README.md +810 -0
- data/Rakefile +12 -0
- data/Thorfile +135 -0
- data/config/.gitkeep +0 -0
- data/config/sanitize_domains.example.yaml +8 -0
- data/config/sitediff.example.yaml +81 -0
- data/docker-compose.test.yml +3 -0
- data/lib/sitediff/api.rb +24 -7
- data/lib/sitediff/cache.rb +5 -3
- data/lib/sitediff/cli.rb +4 -3
- data/lib/sitediff/config/creator.rb +13 -13
- data/lib/sitediff/config/preset.rb +6 -6
- data/lib/sitediff/config.rb +9 -9
- data/lib/sitediff/crawler.rb +15 -5
- data/lib/sitediff/diff.rb +1 -1
- data/lib/sitediff/fetch.rb +2 -2
- data/lib/sitediff/files/report.html.erb +1 -1
- data/lib/sitediff/presets/drupal.yaml +63 -0
- data/lib/sitediff/report.rb +6 -6
- data/lib/sitediff/result.rb +5 -5
- data/lib/sitediff/sanitize/dom_transform.rb +2 -2
- data/lib/sitediff/sanitize/regexp.rb +2 -2
- data/lib/sitediff/sanitize.rb +5 -5
- data/lib/sitediff/uriwrapper.rb +12 -13
- data/lib/sitediff/webserver/resultserver.rb +2 -0
- data/lib/sitediff/webserver.rb +3 -0
- data/lib/sitediff.rb +9 -9
- data/misc/sitediff - overview report.png +0 -0
- data/misc/sitediff - page report.png +0 -0
- data/package-lock.json +878 -0
- data/package.json +25 -0
- data/sitediff.gemspec +51 -0
- metadata +62 -18
data/lib/sitediff/report.rb
CHANGED
@@ -96,7 +96,7 @@ class SiteDiff
|
|
96
96
|
if @config.export
|
97
97
|
package_report(dir)
|
98
98
|
else
|
99
|
-
SiteDiff.log
|
99
|
+
SiteDiff.log "Report generated to #{report_file.expand_path}"
|
100
100
|
end
|
101
101
|
end
|
102
102
|
|
@@ -135,7 +135,7 @@ class SiteDiff
|
|
135
135
|
|
136
136
|
write_settings dir
|
137
137
|
|
138
|
-
SiteDiff.log
|
138
|
+
SiteDiff.log "Report generated to #{report_file.expand_path}"
|
139
139
|
end
|
140
140
|
|
141
141
|
##
|
@@ -147,7 +147,7 @@ class SiteDiff
|
|
147
147
|
temp_path.mkpath
|
148
148
|
report_path = temp_path + REPORT_DIR
|
149
149
|
report_path.mkpath
|
150
|
-
files_path = report_path +
|
150
|
+
files_path = report_path + "files"
|
151
151
|
files_path.mkpath
|
152
152
|
diffs_path = dir + DIFFS_DIR
|
153
153
|
|
@@ -164,7 +164,7 @@ class SiteDiff
|
|
164
164
|
end
|
165
165
|
FileUtils.move(temp_path + REPORT_FILE_TAR, dir)
|
166
166
|
temp_path.rmtree
|
167
|
-
SiteDiff.log
|
167
|
+
SiteDiff.log "Archived report generated to #{dir.join(REPORT_FILE_TAR)}"
|
168
168
|
end
|
169
169
|
|
170
170
|
##
|
@@ -182,7 +182,7 @@ class SiteDiff
|
|
182
182
|
diff_dir.rmtree if diff_dir.exist?
|
183
183
|
|
184
184
|
# Write diffs to the diff directory.
|
185
|
-
@results.each { |r| r.dump(dir, @config.export) if r.status == Result::STATUS_FAILURE }
|
185
|
+
@results.each { |r| r.dump(dir, relative: @config.export) if r.status == Result::STATUS_FAILURE }
|
186
186
|
SiteDiff.log "All diff files written to #{diff_dir.expand_path}" unless @config.export
|
187
187
|
end
|
188
188
|
|
@@ -245,7 +245,7 @@ class SiteDiff
|
|
245
245
|
if File.exist? timestamp_file
|
246
246
|
file = File::Stat.new(timestamp_file)
|
247
247
|
time = file.mtime
|
248
|
-
time.
|
248
|
+
time.instance_of?(Time) ? time.strftime('%Y-%m-%d %H:%M') : ''
|
249
249
|
else
|
250
250
|
'unknown'
|
251
251
|
end
|
data/lib/sitediff/result.rb
CHANGED
@@ -76,19 +76,19 @@ class SiteDiff
|
|
76
76
|
|
77
77
|
# Filename to store diff
|
78
78
|
def filename
|
79
|
-
File.join(Report::DIFFS_DIR, Digest::SHA1.hexdigest(path)
|
79
|
+
File.join(Report::DIFFS_DIR, "#{Digest::SHA1.hexdigest(path)}.html")
|
80
80
|
end
|
81
81
|
|
82
82
|
# Returns a URL to the result diff.
|
83
83
|
#
|
84
84
|
# Returns nil if the result has no diffs.
|
85
|
-
def diff_url(relative
|
85
|
+
def diff_url(relative: false)
|
86
86
|
prefix = relative ? 'files/' : '/files/'
|
87
87
|
return prefix + filename if status == STATUS_FAILURE
|
88
88
|
end
|
89
89
|
|
90
90
|
# Log the result to the terminal
|
91
|
-
def log(verbose
|
91
|
+
def log(verbose: true)
|
92
92
|
case status
|
93
93
|
when STATUS_SUCCESS
|
94
94
|
SiteDiff.log path, :success, 'UNCHANGED'
|
@@ -101,12 +101,12 @@ class SiteDiff
|
|
101
101
|
end
|
102
102
|
|
103
103
|
# Dump the result to a file
|
104
|
-
def dump(dir, relative
|
104
|
+
def dump(dir, relative: false)
|
105
105
|
dump_path = File.join(dir, filename)
|
106
106
|
base = File.dirname(dump_path)
|
107
107
|
FileUtils.mkdir_p(base) unless File.exist?(base)
|
108
108
|
File.open(dump_path, 'w') do |f|
|
109
|
-
f.write(Diff.generate_diff_output(self, relative))
|
109
|
+
f.write(Diff.generate_diff_output(self, relative:))
|
110
110
|
end
|
111
111
|
end
|
112
112
|
end
|
@@ -30,10 +30,10 @@ class SiteDiff
|
|
30
30
|
|
31
31
|
##
|
32
32
|
# TODO: Document what this method does.
|
33
|
-
def targets(node)
|
33
|
+
def targets(node, &block)
|
34
34
|
selectors = to_array(@rule['selector'])
|
35
35
|
selectors.each do |sel|
|
36
|
-
node.css(sel).each
|
36
|
+
node.css(sel).each(&block)
|
37
37
|
end
|
38
38
|
end
|
39
39
|
|
data/lib/sitediff/sanitize.rb
CHANGED
@@ -62,7 +62,7 @@ class SiteDiff
|
|
62
62
|
(rules = @config[name]) || (return nil)
|
63
63
|
|
64
64
|
# Already an array? Do nothing.
|
65
|
-
if rules[0]
|
65
|
+
if rules[0].respond_to?('each') && rules[0]&.fetch('value')
|
66
66
|
# If it is a hash, put it in an array.
|
67
67
|
elsif rules['value']
|
68
68
|
rules = [rules]
|
@@ -146,7 +146,7 @@ class SiteDiff
|
|
146
146
|
def select_regions(node, regions, output)
|
147
147
|
regions = output.map do |name|
|
148
148
|
selector = get_named_region(regions, name)['selector']
|
149
|
-
region = Nokogiri::XML.fragment(
|
149
|
+
region = Nokogiri::XML.fragment("<region id=\"#{name}\"></region>").at_css('region')
|
150
150
|
matching = node.css(selector)
|
151
151
|
matching.each { |m| region.add_child m }
|
152
152
|
region
|
@@ -210,7 +210,7 @@ class SiteDiff
|
|
210
210
|
end
|
211
211
|
|
212
212
|
# Parse HTML into a node
|
213
|
-
def self.domify(str, force_doc
|
213
|
+
def self.domify(str, force_doc: false)
|
214
214
|
if force_doc || /<!DOCTYPE/.match(str[0, 512])
|
215
215
|
Nokogiri::HTML(str)
|
216
216
|
else
|
@@ -224,9 +224,9 @@ class SiteDiff
|
|
224
224
|
obj
|
225
225
|
# node or fragment
|
226
226
|
elsif Nokogiri::XML::Node == obj.class || Nokogiri::HTML::DocumentFragment == obj.class
|
227
|
-
domify(obj.to_s, true)
|
227
|
+
domify(obj.to_s, force_doc: true)
|
228
228
|
else
|
229
|
-
to_document(domify(obj, false))
|
229
|
+
to_document(domify(obj, force_doc: false))
|
230
230
|
end
|
231
231
|
end
|
232
232
|
|
data/lib/sitediff/uriwrapper.rb
CHANGED
@@ -48,12 +48,13 @@ class SiteDiff
|
|
48
48
|
|
49
49
|
##
|
50
50
|
# Creates a UriWrapper.
|
51
|
-
def initialize(uri, curl_opts = DEFAULT_CURL_OPTS, debug
|
51
|
+
def initialize(uri, curl_opts = DEFAULT_CURL_OPTS, debug: true, referrer: '')
|
52
52
|
@uri = uri.respond_to?(:scheme) ? uri : Addressable::URI.parse(uri)
|
53
53
|
# remove trailing '/'s from local URIs
|
54
54
|
@uri.path.gsub!(%r{/*$}, '') if local?
|
55
55
|
@curl_opts = curl_opts
|
56
56
|
@debug = debug
|
57
|
+
@referrer = referrer
|
57
58
|
end
|
58
59
|
|
59
60
|
##
|
@@ -103,10 +104,9 @@ class SiteDiff
|
|
103
104
|
# Returns the encoding of an HTTP response from headers , nil if not
|
104
105
|
# specified.
|
105
106
|
def charset_encoding(http_headers)
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
end
|
107
|
+
content_type = http_headers['Content-Type']
|
108
|
+
if (md = /;\s*charset=([-\w]*)/.match(content_type))
|
109
|
+
md[1]
|
110
110
|
end
|
111
111
|
end
|
112
112
|
|
@@ -117,7 +117,7 @@ class SiteDiff
|
|
117
117
|
def typhoeus_request
|
118
118
|
params = @curl_opts.dup
|
119
119
|
# Allow basic auth
|
120
|
-
params[:userpwd] = @uri.user
|
120
|
+
params[:userpwd] = "#{@uri.user}: #{@uri.password}" if @uri.user
|
121
121
|
|
122
122
|
req = Typhoeus::Request.new(to_s, params)
|
123
123
|
|
@@ -137,32 +137,31 @@ class SiteDiff
|
|
137
137
|
raise if @debug
|
138
138
|
|
139
139
|
yield ReadResult.error(
|
140
|
-
"Parsing error for #{@uri}: #{e.message}"
|
140
|
+
"Parsing error for #{@uri}: #{e.message} From page: #{@referrer}"
|
141
141
|
)
|
142
142
|
rescue StandardError => e
|
143
143
|
raise if @debug
|
144
144
|
|
145
145
|
yield ReadResult.error(
|
146
|
-
"Unknown parsing error for #{@uri}: #{e.message}"
|
146
|
+
"Unknown parsing error for #{@uri}: #{e.message} From page: #{@referrer}"
|
147
147
|
)
|
148
148
|
end
|
149
149
|
end
|
150
150
|
|
151
|
-
req.on_failure do |resp|
|
151
|
+
req.on_failure do |resp|
|
152
152
|
if resp&.status_message
|
153
|
-
msg = resp.status_message
|
154
153
|
yield ReadResult.error(
|
155
|
-
"HTTP error when loading #{@uri}: #{
|
154
|
+
"HTTP error when loading #{@uri} : [#{resp.response_code}] #{resp.status_message} From page: #{@referrer}",
|
156
155
|
resp.response_code
|
157
156
|
)
|
158
157
|
elsif (msg = resp.options[:return_code])
|
159
158
|
yield ReadResult.error(
|
160
|
-
"Connection error when loading #{@uri}: #{msg}",
|
159
|
+
"Connection error when loading #{@uri} : [#{resp.options[:return_code]}] #{msg} From page: #{@referrer}",
|
161
160
|
resp.response_code
|
162
161
|
)
|
163
162
|
else
|
164
163
|
yield ReadResult.error(
|
165
|
-
"Unknown error when loading #{@uri}: #{
|
164
|
+
"Unknown error when loading #{@uri} : [#{resp.response_code}] #{resp.status_message} From page: #{@referrer}",
|
166
165
|
resp.response_code
|
167
166
|
)
|
168
167
|
end
|
@@ -14,6 +14,7 @@ class SiteDiff
|
|
14
14
|
##
|
15
15
|
# Creates a Cache Servlet.
|
16
16
|
def initialize(_server, cache)
|
17
|
+
super
|
17
18
|
@cache = cache
|
18
19
|
end
|
19
20
|
|
@@ -41,6 +42,7 @@ class SiteDiff
|
|
41
42
|
##
|
42
43
|
# Creates a Side By Side Servlet.
|
43
44
|
def initialize(_server, cache, settings)
|
45
|
+
super
|
44
46
|
@cache = cache
|
45
47
|
@settings = settings
|
46
48
|
end
|
data/lib/sitediff/webserver.rb
CHANGED
@@ -77,15 +77,18 @@ class SiteDiff
|
|
77
77
|
BASE = 'spec/sites/ruby-doc.org'
|
78
78
|
NAMES = %w[core-1.9.3 core-2.0].freeze
|
79
79
|
|
80
|
+
# Initialize web server.
|
80
81
|
def initialize(port = PORT, base = BASE, names = NAMES)
|
81
82
|
dirs = names.map { |n| File.join(base, n) }
|
82
83
|
super(port, dirs, quiet: true)
|
83
84
|
end
|
84
85
|
|
86
|
+
# Get the before site uri.
|
85
87
|
def before
|
86
88
|
uris.first
|
87
89
|
end
|
88
90
|
|
91
|
+
# Get the after site uri.
|
89
92
|
def after
|
90
93
|
uris.last
|
91
94
|
end
|
data/lib/sitediff.rb
CHANGED
@@ -47,7 +47,7 @@ class SiteDiff
|
|
47
47
|
bg = :yellow
|
48
48
|
end
|
49
49
|
|
50
|
-
label =
|
50
|
+
label = "[#{label}]"
|
51
51
|
label = Rainbow(label)
|
52
52
|
label = label.bg(bg) if bg
|
53
53
|
label = label.fg(fg) if fg
|
@@ -76,7 +76,7 @@ class SiteDiff
|
|
76
76
|
end
|
77
77
|
|
78
78
|
# Initialize SiteDiff.
|
79
|
-
def initialize(config, cache, verbose
|
79
|
+
def initialize(config, cache, verbose: true, debug: false)
|
80
80
|
@cache = cache
|
81
81
|
@verbose = verbose
|
82
82
|
@debug = debug
|
@@ -97,7 +97,7 @@ class SiteDiff
|
|
97
97
|
end
|
98
98
|
|
99
99
|
# Sanitize HTML.
|
100
|
-
def sanitize(
|
100
|
+
def sanitize(path_passed, read_results)
|
101
101
|
%i[before after].map do |tag|
|
102
102
|
html = read_results[tag].content
|
103
103
|
# TODO: See why encoding is empty while running tests.
|
@@ -107,8 +107,8 @@ class SiteDiff
|
|
107
107
|
# during rspec tests for some reason.
|
108
108
|
encoding = read_results[tag].encoding
|
109
109
|
if encoding || html.length.positive?
|
110
|
-
section = @config.send(tag, true)
|
111
|
-
opts = { path:
|
110
|
+
section = @config.send(tag, apply_preset: true)
|
111
|
+
opts = { path: path_passed }
|
112
112
|
opts[:output] = @config.output if @config.output
|
113
113
|
Sanitizer.new(html, section, opts).sanitize
|
114
114
|
else
|
@@ -144,7 +144,7 @@ class SiteDiff
|
|
144
144
|
|
145
145
|
# Print results in order!
|
146
146
|
while (next_diff = @results[@ordered.first])
|
147
|
-
next_diff.log(@verbose)
|
147
|
+
next_diff.log(verbose: @verbose)
|
148
148
|
@ordered.shift
|
149
149
|
end
|
150
150
|
end
|
@@ -160,7 +160,7 @@ class SiteDiff
|
|
160
160
|
@ordered = @config.paths.dup
|
161
161
|
|
162
162
|
unless @cache.read_tags.empty?
|
163
|
-
SiteDiff.log(
|
163
|
+
SiteDiff.log("Using sites from cache: #{@cache.read_tags.sort.join(', ')}")
|
164
164
|
end
|
165
165
|
|
166
166
|
# TODO: Fix this after config merge refactor!
|
@@ -175,7 +175,7 @@ class SiteDiff
|
|
175
175
|
@config.setting(:interval),
|
176
176
|
@config.setting(:concurrency),
|
177
177
|
curl_opts,
|
178
|
-
@debug,
|
178
|
+
debug: @debug,
|
179
179
|
before: @config.before_url,
|
180
180
|
after: @config.after_url
|
181
181
|
)
|
@@ -203,7 +203,7 @@ class SiteDiff
|
|
203
203
|
##
|
204
204
|
# Get SiteDiff gemspec.
|
205
205
|
def self.gemspec
|
206
|
-
file = ROOT_DIR
|
206
|
+
file = "#{ROOT_DIR}/sitediff.gemspec"
|
207
207
|
Gem::Specification.load(file)
|
208
208
|
end
|
209
209
|
|
Binary file
|
Binary file
|