sitediff 1.2.1 → 1.2.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -1
- data/Dockerfile +4 -4
- data/Gemfile.lock +14 -16
- data/README.md +10 -3
- data/lib/sitediff/api.rb +9 -5
- data/lib/sitediff/crawler.rb +3 -1
- data/lib/sitediff/files/report.html.erb +6 -4
- data/lib/sitediff/presets/drupal.yaml +5 -1
- data/lib/sitediff/report.rb +2 -2
- data/lib/sitediff/sanitize.rb +2 -2
- data/lib/sitediff/uriwrapper.rb +10 -4
- data/lib/sitediff.rb +1 -1
- data/package-lock.json +3 -3
- data/sitediff.gemspec +3 -3
- metadata +7 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d1ee18040cb7cad571450e2e17f9144bdab9965b0c52f84199a8eecb7d046e71
|
4
|
+
data.tar.gz: 7d3744b782caae37bbb7fc7789e9fd3d0ae600849dbd13c9d77f60201af56792
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7c3fc26f34f94fcacee9a8d87d7cdda3ea0c9a79aa40b8ff82d8a1f19b677fdda2fee147d6b91a3a9df0c4dc80541b9ae516778abc72fb4046399c18eefafb0d
|
7
|
+
data.tar.gz: 7f615aed415a0f313badb5a32873b611200b878587d40c5d5498d06a05088bb91531d4130c7f0f64261c6e9d33a167943b00b3023c2e9ef14c524cc2dc3d9aae
|
data/CHANGELOG.md
CHANGED
@@ -1,6 +1,19 @@
|
|
1
1
|
# SiteDiff Change Log
|
2
2
|
|
3
3
|
Contains noteworthy changes made to SiteDiff.
|
4
|
+
## Version 1.2.5
|
5
|
+
- Fix issue with whitespace in URLs.
|
6
|
+
|
7
|
+
## Version 1.2.4
|
8
|
+
- Fix issue with 'store' command.
|
9
|
+
|
10
|
+
## Version 1.2.3
|
11
|
+
- Fix issue with nil object during diff report generation.
|
12
|
+
- Update to export documentation.
|
13
|
+
|
14
|
+
## Version 1.2.2
|
15
|
+
- Security update for Nokogiri.
|
16
|
+
- Minor code updates.
|
4
17
|
|
5
18
|
## Version 1.2.1
|
6
19
|
- Fixed a bug with report exporting.
|
@@ -30,4 +43,4 @@ Contains noteworthy changes made to SiteDiff.
|
|
30
43
|
|
31
44
|
## Prior to 1.0.0
|
32
45
|
|
33
|
-
Release notes were out of date, so only tracking changes since 1.0.0 here.
|
46
|
+
Release notes were out of date, so only tracking changes since 1.0.0 here.
|
data/Dockerfile
CHANGED
@@ -10,10 +10,10 @@ ARG DEBIAN_FRONTEND=noninteractive
|
|
10
10
|
# Our build requires rake
|
11
11
|
# Install editors: vim, nano.
|
12
12
|
RUN apt-get update
|
13
|
-
RUN apt-get install -y apt-utils
|
14
|
-
|
15
|
-
|
16
|
-
|
13
|
+
RUN apt-get install -y apt-utils \
|
14
|
+
software-properties-common \
|
15
|
+
make pkg-config libxml2-dev libxslt-dev \
|
16
|
+
vim nano git
|
17
17
|
|
18
18
|
# Force nokogiri gem not to compile libxml2, it takes too long
|
19
19
|
ENV NOKOGIRI_USE_SYSTEM_LIBRARIES 1
|
data/Gemfile.lock
CHANGED
@@ -1,41 +1,39 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
sitediff (1.2.
|
4
|
+
sitediff (1.2.4)
|
5
5
|
addressable (>= 2.5.2, < 2.9.0)
|
6
6
|
diffy (~> 3.4.0)
|
7
7
|
minitar (~> 0.9)
|
8
|
-
nokogiri (>= 1.
|
8
|
+
nokogiri (>= 1.14.2)
|
9
9
|
pkg-config (~> 1.4)
|
10
10
|
rainbow (~> 3.1.1)
|
11
11
|
thor (~> 1.2.1)
|
12
|
-
typhoeus (~> 1.4.
|
12
|
+
typhoeus (~> 1.4.1)
|
13
13
|
webrick (>= 1.7)
|
14
14
|
|
15
15
|
GEM
|
16
16
|
remote: https://rubygems.org/
|
17
17
|
specs:
|
18
|
-
addressable (2.8.
|
19
|
-
public_suffix (>= 2.0.2, <
|
18
|
+
addressable (2.8.1)
|
19
|
+
public_suffix (>= 2.0.2, < 6.0)
|
20
20
|
ast (2.4.2)
|
21
21
|
diff-lcs (1.5.0)
|
22
22
|
diffy (3.4.2)
|
23
|
-
ethon (0.
|
24
|
-
ffi (>= 1.
|
23
|
+
ethon (0.10.0)
|
24
|
+
ffi (>= 1.3.0)
|
25
25
|
ffi (1.15.5)
|
26
26
|
fileutils (1.1.0)
|
27
27
|
json (2.6.2)
|
28
|
-
mini_portile2 (2.8.0)
|
29
28
|
minitar (0.9)
|
30
|
-
nokogiri (1.
|
31
|
-
mini_portile2 (~> 2.8.0)
|
29
|
+
nokogiri (1.14.2-arm64-darwin)
|
32
30
|
racc (~> 1.4)
|
33
31
|
parallel (1.22.1)
|
34
32
|
parser (3.1.2.0)
|
35
33
|
ast (~> 2.4.1)
|
36
|
-
pkg-config (1.
|
37
|
-
public_suffix (
|
38
|
-
racc (1.6.
|
34
|
+
pkg-config (1.5.1)
|
35
|
+
public_suffix (5.0.1)
|
36
|
+
racc (1.6.2)
|
39
37
|
rainbow (3.1.1)
|
40
38
|
regexp_parser (2.5.0)
|
41
39
|
rexml (3.2.5)
|
@@ -66,10 +64,10 @@ GEM
|
|
66
64
|
parser (>= 3.1.1.0)
|
67
65
|
ruby-progressbar (1.11.0)
|
68
66
|
thor (1.2.1)
|
69
|
-
typhoeus (1.4.
|
70
|
-
ethon (
|
67
|
+
typhoeus (1.4.1)
|
68
|
+
ethon (= 0.10.0)
|
71
69
|
unicode-display_width (2.2.0)
|
72
|
-
webrick (1.
|
70
|
+
webrick (1.8.1)
|
73
71
|
|
74
72
|
PLATFORMS
|
75
73
|
ruby
|
data/README.md
CHANGED
@@ -294,6 +294,13 @@ Generate a gzipped tar file containing the HTML report instead of generating
|
|
294
294
|
and serving live web pages, this option overrides `--report-format`, forcing
|
295
295
|
HTML.
|
296
296
|
|
297
|
+
```
|
298
|
+
sitediff diff --export
|
299
|
+
sitediff diff -e
|
300
|
+
```
|
301
|
+
|
302
|
+
This will perform the diff and export the results in a gzipped tar file.
|
303
|
+
|
297
304
|
### Running inside containers
|
298
305
|
|
299
306
|
If you run SiteDiff inside a container or virtual machine, the URLs in its
|
@@ -441,7 +448,7 @@ before comparison:
|
|
441
448
|
dom_transform:
|
442
449
|
# Remove current time block
|
443
450
|
- type: remove
|
444
|
-
|
451
|
+
selector: div#block-time
|
445
452
|
```
|
446
453
|
|
447
454
|
#### strip
|
@@ -458,7 +465,7 @@ To transform `<h1> Foo and Bar\n </h1>` to `<h1>Foo and Bar<\h1>`:
|
|
458
465
|
dom_transform:
|
459
466
|
# Strip H1 tags
|
460
467
|
- type: strip
|
461
|
-
|
468
|
+
selector: h1
|
462
469
|
```
|
463
470
|
|
464
471
|
#### unwrap
|
@@ -655,7 +662,7 @@ EG:
|
|
655
662
|
</div>
|
656
663
|
</region>
|
657
664
|
<region id="body">
|
658
|
-
<div class="
|
665
|
+
<div class="field-name-attribution">
|
659
666
|
<p>Lorem ipsum...
|
660
667
|
</div>
|
661
668
|
</region>
|
data/lib/sitediff/api.rb
CHANGED
@@ -160,12 +160,12 @@ class SiteDiff
|
|
160
160
|
)
|
161
161
|
@paths = {}
|
162
162
|
|
163
|
-
|
163
|
+
ignore_after = @config.roots
|
164
164
|
if @config.roots['before'] == @config.roots['after']
|
165
|
-
|
165
|
+
ignore_after.delete('after')
|
166
166
|
end
|
167
|
-
|
168
|
-
|
167
|
+
|
168
|
+
ignore_after.each do |tag, url|
|
169
169
|
Crawler.new(
|
170
170
|
hydra,
|
171
171
|
url,
|
@@ -184,6 +184,10 @@ class SiteDiff
|
|
184
184
|
|
185
185
|
# Write paths to a file.
|
186
186
|
@paths = @paths.values.reduce(&:|).to_a.sort
|
187
|
+
if @paths.none? | @paths.nil?
|
188
|
+
return
|
189
|
+
end
|
190
|
+
|
187
191
|
@config.paths_file_write(@paths)
|
188
192
|
|
189
193
|
# Log output.
|
@@ -230,7 +234,7 @@ class SiteDiff
|
|
230
234
|
@config.setting(:interval),
|
231
235
|
@config.setting(:concurrency),
|
232
236
|
get_curl_opts(@config.settings),
|
233
|
-
options[:debug],
|
237
|
+
debug: options[:debug],
|
234
238
|
before: base)
|
235
239
|
fetcher.run do |path, _res|
|
236
240
|
SiteDiff.log "Visited #{path}, cached"
|
data/lib/sitediff/crawler.rb
CHANGED
@@ -43,7 +43,7 @@ class SiteDiff
|
|
43
43
|
|
44
44
|
@found << rel
|
45
45
|
|
46
|
-
wrapper = UriWrapper.new(@base + rel, @curl_opts, debug: @debug, referrer:
|
46
|
+
wrapper = UriWrapper.new(@base + rel, @curl_opts, debug: @debug, referrer:)
|
47
47
|
wrapper.queue(@hydra) do |res|
|
48
48
|
fetched_uri(rel, depth, res)
|
49
49
|
end
|
@@ -96,6 +96,7 @@ class SiteDiff
|
|
96
96
|
|
97
97
|
# Resolve a potentially-relative link. Return nil on error.
|
98
98
|
def resolve_link(base, rel)
|
99
|
+
rel = rel.strip
|
99
100
|
base + rel
|
100
101
|
rescue Addressable::URI::InvalidURIError
|
101
102
|
SiteDiff.log "skipped invalid URL: '#{rel}' (at #{base})", :warning
|
@@ -129,6 +130,7 @@ class SiteDiff
|
|
129
130
|
u.path.start_with?(@base_uri.path)
|
130
131
|
next unless is_sub_uri
|
131
132
|
|
133
|
+
# puts "Trying regex #{u.path}"
|
132
134
|
is_included = @include_regex.nil? ? false : @include_regex.match(u.path)
|
133
135
|
is_excluded = @exclude_regex.nil? ? false : @exclude_regex.match(u.path)
|
134
136
|
if is_excluded && !is_included
|
@@ -4,6 +4,8 @@
|
|
4
4
|
|
5
5
|
# Pages compared.
|
6
6
|
compared_pages = results.length
|
7
|
+
|
8
|
+
url_hash = '?' + Time.now.strftime("%s%L")
|
7
9
|
%>
|
8
10
|
<!DOCTYPE html>
|
9
11
|
<html>
|
@@ -134,17 +136,17 @@
|
|
134
136
|
<div class="buttons">
|
135
137
|
<% unless relative %>
|
136
138
|
<% unless report['before_url_report'] === false %>
|
137
|
-
<a href="<%= result.url(:before, before_url_report || before, cache) %>" class="button-before" target="_blank">Before</a>
|
139
|
+
<a href="<%= result.url(:before, before_url_report || before, cache) + url_hash %>" class="button-before" target="_blank">Before</a>
|
138
140
|
<% end %>
|
139
141
|
<% unless report['after_url_report'] === false %>
|
140
|
-
<a href="<%= result.url(:after, after_url_report || after, cache) %>" class="button-after" target="_blank">After</a>
|
142
|
+
<a href="<%= result.url(:after, after_url_report || after, cache) + url_hash %>" class="button-after" target="_blank">After</a>
|
141
143
|
<% end %>
|
142
144
|
<% unless report['before_url_report'] === false || report['after_url_report'] === false %>
|
143
|
-
<a href="/sidebyside<%= result.path %>" class="button-both">Both</a>
|
145
|
+
<a href="/sidebyside<%= result.path + url_hash %>" class="button-both">Both</a>
|
144
146
|
<% end %>
|
145
147
|
<% end %>
|
146
148
|
<% unless result.diff_url.nil? %>
|
147
|
-
<a href="<%= result.diff_url(relative: relative) %>" class="button button-diff">View diff</a>
|
149
|
+
<a href="<%= result.diff_url(relative: relative) + url_hash %>" class="button button-diff">View diff</a>
|
148
150
|
<% end %>
|
149
151
|
</div>
|
150
152
|
</td>
|
@@ -29,7 +29,7 @@ sanitization:
|
|
29
29
|
pattern: '(src="[^"]*/misc/\w+\.js)?v=\d+\.\d+"'
|
30
30
|
substitute: '\1'
|
31
31
|
- title: Strip domain names from absolute URLs
|
32
|
-
pattern: '
|
32
|
+
pattern: 'https?:\/\/[a-zA-Z0-9.:-]+'
|
33
33
|
substitute: '__domain__'
|
34
34
|
- title: Strip form build ID
|
35
35
|
selector: input
|
@@ -61,3 +61,7 @@ sanitization:
|
|
61
61
|
selector: script
|
62
62
|
pattern: 'js_[-\w]{40,43}\\?\.js'
|
63
63
|
substitute: 'js__ID__.js'
|
64
|
+
dom_transform:
|
65
|
+
# Strip Drupal.settings (>8.0)
|
66
|
+
- type: remove
|
67
|
+
selector: 'script[data-drupal-selector="drupal-settings-json"]'
|
data/lib/sitediff/report.rb
CHANGED
@@ -147,8 +147,8 @@ class SiteDiff
|
|
147
147
|
temp_path.mkpath
|
148
148
|
report_path = temp_path + REPORT_DIR
|
149
149
|
report_path.mkpath
|
150
|
-
files_path = report_path
|
151
|
-
|
150
|
+
files_path = "#{report_path}/files"
|
151
|
+
FileUtils.mkpath(files_path)
|
152
152
|
diffs_path = dir + DIFFS_DIR
|
153
153
|
|
154
154
|
# Move files to place.
|
data/lib/sitediff/sanitize.rb
CHANGED
@@ -220,10 +220,10 @@ class SiteDiff
|
|
220
220
|
|
221
221
|
# Force this object to be a document, so we can apply a stylesheet
|
222
222
|
def self.to_document(obj)
|
223
|
-
if Nokogiri::XML::Document
|
223
|
+
if obj.instance_of?(Nokogiri::XML::Document) || obj.instance_of?(Nokogiri::HTML::Document)
|
224
224
|
obj
|
225
225
|
# node or fragment
|
226
|
-
elsif Nokogiri::XML::Node
|
226
|
+
elsif obj.instance_of?(Nokogiri::XML::Node) || obj.instance_of?(Nokogiri::HTML::DocumentFragment)
|
227
227
|
domify(obj.to_s, force_doc: true)
|
228
228
|
else
|
229
229
|
to_document(domify(obj, force_doc: false))
|
data/lib/sitediff/uriwrapper.rb
CHANGED
@@ -119,6 +119,12 @@ class SiteDiff
|
|
119
119
|
# Allow basic auth
|
120
120
|
params[:userpwd] = "#{@uri.user}: #{@uri.password}" if @uri.user
|
121
121
|
|
122
|
+
# params['verbose'] = true
|
123
|
+
# params['ssl_verifypeer'] = false
|
124
|
+
# params['ssl_verifyhost'] = 0
|
125
|
+
# params['followlocation'] = true
|
126
|
+
# puts to_s
|
127
|
+
|
122
128
|
req = Typhoeus::Request.new(to_s, params)
|
123
129
|
|
124
130
|
req.on_success do |resp|
|
@@ -148,20 +154,20 @@ class SiteDiff
|
|
148
154
|
end
|
149
155
|
end
|
150
156
|
|
151
|
-
req.on_failure do |resp|
|
157
|
+
req.on_failure do |resp|
|
152
158
|
if resp&.status_message
|
153
159
|
yield ReadResult.error(
|
154
|
-
"HTTP error when loading #{@uri} : [#{resp.response_code}] #{resp.status_message} From
|
160
|
+
"HTTP error when loading #{@uri} : [#{resp.response_code}] #{resp.status_message} From: #{@referrer}",
|
155
161
|
resp.response_code
|
156
162
|
)
|
157
163
|
elsif (msg = resp.options[:return_code])
|
158
164
|
yield ReadResult.error(
|
159
|
-
"Connection error when loading #{@uri} : [#{resp.options[:return_code]}] #{msg} From
|
165
|
+
"Connection error when loading #{@uri} : [#{resp.options[:return_code]}] #{msg} From: #{@referrer}",
|
160
166
|
resp.response_code
|
161
167
|
)
|
162
168
|
else
|
163
169
|
yield ReadResult.error(
|
164
|
-
"Unknown error when loading #{@uri} : [#{resp.response_code}] #{resp.status_message}
|
170
|
+
"Unknown error when loading #{@uri} : [#{resp.response_code}] #{resp.status_message} From: #{@referrer}",
|
165
171
|
resp.response_code
|
166
172
|
)
|
167
173
|
end
|
data/lib/sitediff.rb
CHANGED
@@ -137,7 +137,7 @@ class SiteDiff
|
|
137
137
|
rescue StandardError => e
|
138
138
|
raise if @debug
|
139
139
|
|
140
|
-
Result.new(path, nil, nil, nil, nil, "Sanitization error: #{e}")
|
140
|
+
diff = Result.new(path, nil, nil, nil, nil, "Sanitization error: #{e.message}")
|
141
141
|
end
|
142
142
|
end
|
143
143
|
@results[path] = diff
|
data/package-lock.json
CHANGED
@@ -588,9 +588,9 @@
|
|
588
588
|
"dev": true
|
589
589
|
},
|
590
590
|
"minimatch": {
|
591
|
-
"version": "3.
|
592
|
-
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.
|
593
|
-
"integrity": "sha512-
|
591
|
+
"version": "3.1.2",
|
592
|
+
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
|
593
|
+
"integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==",
|
594
594
|
"dev": true,
|
595
595
|
"requires": {
|
596
596
|
"brace-expansion": "^1.1.7"
|
data/sitediff.gemspec
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = 'sitediff'
|
5
|
-
s.version = '1.2.
|
5
|
+
s.version = '1.2.4'
|
6
6
|
s.required_ruby_version = '>= 3.1.2'
|
7
7
|
s.summary = 'Compare two versions of a site with ease!'
|
8
8
|
s.description = <<DESC
|
@@ -35,14 +35,14 @@ DESC
|
|
35
35
|
|
36
36
|
s.add_dependency 'minitar', '~> 0.9'
|
37
37
|
s.add_dependency 'thor', '~> 1.2.1'
|
38
|
-
s.add_dependency 'typhoeus', '~> 1.4.
|
38
|
+
s.add_dependency 'typhoeus', '~> 1.4.1'
|
39
39
|
|
40
40
|
# A bug in rubygems can break rainbow 2.2
|
41
41
|
# https://github.com/bundler/bundler/issues/5357
|
42
42
|
s.add_dependency 'rainbow', '~> 3.1.1'
|
43
43
|
|
44
44
|
# Nokogiri 1.7 is not supported on Ruby 2.0.
|
45
|
-
s.add_dependency 'nokogiri', '>= 1.
|
45
|
+
s.add_dependency 'nokogiri', '>= 1.14.2'
|
46
46
|
|
47
47
|
# Diffy and addressable have a max version for Ruby 1.9.
|
48
48
|
s.add_dependency 'addressable', '>= 2.5.2', '< 2.9.0'
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sitediff
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.
|
4
|
+
version: 1.2.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alex Dergachev
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date:
|
13
|
+
date: 2023-05-14 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: pkg-config
|
@@ -60,14 +60,14 @@ dependencies:
|
|
60
60
|
requirements:
|
61
61
|
- - "~>"
|
62
62
|
- !ruby/object:Gem::Version
|
63
|
-
version: 1.4.
|
63
|
+
version: 1.4.1
|
64
64
|
type: :runtime
|
65
65
|
prerelease: false
|
66
66
|
version_requirements: !ruby/object:Gem::Requirement
|
67
67
|
requirements:
|
68
68
|
- - "~>"
|
69
69
|
- !ruby/object:Gem::Version
|
70
|
-
version: 1.4.
|
70
|
+
version: 1.4.1
|
71
71
|
- !ruby/object:Gem::Dependency
|
72
72
|
name: rainbow
|
73
73
|
requirement: !ruby/object:Gem::Requirement
|
@@ -88,14 +88,14 @@ dependencies:
|
|
88
88
|
requirements:
|
89
89
|
- - ">="
|
90
90
|
- !ruby/object:Gem::Version
|
91
|
-
version: 1.
|
91
|
+
version: 1.14.2
|
92
92
|
type: :runtime
|
93
93
|
prerelease: false
|
94
94
|
version_requirements: !ruby/object:Gem::Requirement
|
95
95
|
requirements:
|
96
96
|
- - ">="
|
97
97
|
- !ruby/object:Gem::Version
|
98
|
-
version: 1.
|
98
|
+
version: 1.14.2
|
99
99
|
- !ruby/object:Gem::Dependency
|
100
100
|
name: addressable
|
101
101
|
requirement: !ruby/object:Gem::Requirement
|
@@ -229,7 +229,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
229
229
|
- !ruby/object:Gem::Version
|
230
230
|
version: '0'
|
231
231
|
requirements: []
|
232
|
-
rubygems_version: 3.
|
232
|
+
rubygems_version: 3.4.8
|
233
233
|
signing_key:
|
234
234
|
specification_version: 4
|
235
235
|
summary: Compare two versions of a site with ease!
|