sitediff 1.2.0 → 1.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2e37a67bb4f0d7b93f252940d4ee0e4e3184dc27a435626f995b70bad4a7fc40
4
- data.tar.gz: 692d0a82b230e2dbab10fe8f6ee8591ca0128998f4c9acd6b97a37a90856c887
3
+ metadata.gz: d1ee18040cb7cad571450e2e17f9144bdab9965b0c52f84199a8eecb7d046e71
4
+ data.tar.gz: 7d3744b782caae37bbb7fc7789e9fd3d0ae600849dbd13c9d77f60201af56792
5
5
  SHA512:
6
- metadata.gz: 72551efe76eaa6a4a23aeacba6b25cb3bb8b4483d27299846f55ba7a7bad8e256f798de23db36f501a6739875494be808541834bfaadf587fbbd078b6fa62506
7
- data.tar.gz: 977634a139f70794aa5015e42e014e542391f5cf3c07bd34bff1f7128514051c341512d607f1cb9dfb0326c2bee0c80b2c6b4b119e21897adb5bf516fa7a3140
6
+ metadata.gz: 7c3fc26f34f94fcacee9a8d87d7cdda3ea0c9a79aa40b8ff82d8a1f19b677fdda2fee147d6b91a3a9df0c4dc80541b9ae516778abc72fb4046399c18eefafb0d
7
+ data.tar.gz: 7f615aed415a0f313badb5a32873b611200b878587d40c5d5498d06a05088bb91531d4130c7f0f64261c6e9d33a167943b00b3023c2e9ef14c524cc2dc3d9aae
data/CHANGELOG.md CHANGED
@@ -1,6 +1,24 @@
1
1
  # SiteDiff Change Log
2
2
 
3
3
  Contains noteworthy changes made to SiteDiff.
4
+ ## Version 1.2.5
5
+ - Fix issue with whitespace in URLs.
6
+
7
+ ## Version 1.2.4
8
+ - Fix issue with 'store' command.
9
+
10
+ ## Version 1.2.3
11
+ - Fix issue with nil object during diff report generation.
12
+ - Update to export documentation.
13
+
14
+ ## Version 1.2.2
15
+ - Security update for Nokogiri.
16
+ - Minor code updates.
17
+
18
+ ## Version 1.2.1
19
+ - Fixed a bug with report exporting.
20
+ - Prevents crawling the same site twice if the before and after urls are the same.
21
+ - Adding a referrer to the crawler errors.
4
22
 
5
23
  ## Version 1.2.0
6
24
  - Updated requirement to Ruby 3.1.2.
@@ -25,4 +43,4 @@ Contains noteworthy changes made to SiteDiff.
25
43
 
26
44
  ## Prior to 1.0.0
27
45
 
28
- Release notes were out of date, so only tracking changes since 1.0.0 here.
46
+ Release notes were out of date, so only tracking changes since 1.0.0 here.
data/Dockerfile CHANGED
@@ -10,10 +10,10 @@ ARG DEBIAN_FRONTEND=noninteractive
10
10
  # Our build requires rake
11
11
  # Install editors: vim, nano.
12
12
  RUN apt-get update
13
- RUN apt-get install -y apt-utils
14
- RUN apt-get install -y software-properties-common
15
- RUN apt-get install -y make pkg-config libxml2-dev libxslt-dev
16
- RUN apt-get install -y vim nano git
13
+ RUN apt-get install -y apt-utils \
14
+ software-properties-common \
15
+ make pkg-config libxml2-dev libxslt-dev \
16
+ vim nano git
17
17
 
18
18
  # Force nokogiri gem not to compile libxml2, it takes too long
19
19
  ENV NOKOGIRI_USE_SYSTEM_LIBRARIES 1
data/Gemfile.lock CHANGED
@@ -1,41 +1,39 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- sitediff (1.2.0)
4
+ sitediff (1.2.4)
5
5
  addressable (>= 2.5.2, < 2.9.0)
6
6
  diffy (~> 3.4.0)
7
7
  minitar (~> 0.9)
8
- nokogiri (>= 1.13.6)
8
+ nokogiri (>= 1.14.2)
9
9
  pkg-config (~> 1.4)
10
10
  rainbow (~> 3.1.1)
11
11
  thor (~> 1.2.1)
12
- typhoeus (~> 1.4.0)
12
+ typhoeus (~> 1.4.1)
13
13
  webrick (>= 1.7)
14
14
 
15
15
  GEM
16
16
  remote: https://rubygems.org/
17
17
  specs:
18
- addressable (2.8.0)
19
- public_suffix (>= 2.0.2, < 5.0)
18
+ addressable (2.8.1)
19
+ public_suffix (>= 2.0.2, < 6.0)
20
20
  ast (2.4.2)
21
21
  diff-lcs (1.5.0)
22
22
  diffy (3.4.2)
23
- ethon (0.15.0)
24
- ffi (>= 1.15.0)
23
+ ethon (0.10.0)
24
+ ffi (>= 1.3.0)
25
25
  ffi (1.15.5)
26
26
  fileutils (1.1.0)
27
27
  json (2.6.2)
28
- mini_portile2 (2.8.0)
29
28
  minitar (0.9)
30
- nokogiri (1.13.6)
31
- mini_portile2 (~> 2.8.0)
29
+ nokogiri (1.14.2-arm64-darwin)
32
30
  racc (~> 1.4)
33
31
  parallel (1.22.1)
34
32
  parser (3.1.2.0)
35
33
  ast (~> 2.4.1)
36
- pkg-config (1.4.7)
37
- public_suffix (4.0.7)
38
- racc (1.6.0)
34
+ pkg-config (1.5.1)
35
+ public_suffix (5.0.1)
36
+ racc (1.6.2)
39
37
  rainbow (3.1.1)
40
38
  regexp_parser (2.5.0)
41
39
  rexml (3.2.5)
@@ -66,10 +64,10 @@ GEM
66
64
  parser (>= 3.1.1.0)
67
65
  ruby-progressbar (1.11.0)
68
66
  thor (1.2.1)
69
- typhoeus (1.4.0)
70
- ethon (>= 0.9.0)
67
+ typhoeus (1.4.1)
68
+ ethon (= 0.10.0)
71
69
  unicode-display_width (2.2.0)
72
- webrick (1.7.0)
70
+ webrick (1.8.1)
73
71
 
74
72
  PLATFORMS
75
73
  ruby
data/INSTALLATION.md CHANGED
@@ -36,7 +36,7 @@ avoid using `sudo` for `gem install`.
36
36
 
37
37
  ```bash
38
38
  gem install nokogiri --no-rdoc --no-ri -- --use-system-libraries=true —with-xml2-include=/usr/include/libxml2
39
- gem install sitediff -v '1.2.0'
39
+ gem install sitediff -v '1.2.1'
40
40
  ```
41
41
 
42
42
  ## Docker
@@ -88,7 +88,7 @@ If possible avoid using `sudo` for `gem install`.
88
88
 
89
89
  ```bash
90
90
  gem install nokogiri --no-rdoc --no-ri -- --use-system-libraries=true —with-xml2-include=/usr/include/libxml2
91
- gem install sitediff -v '1.2.0'
91
+ gem install sitediff -v '1.2.1'
92
92
  ```
93
93
 
94
94
  ## Ubuntu
data/README.md CHANGED
@@ -294,6 +294,13 @@ Generate a gzipped tar file containing the HTML report instead of generating
294
294
  and serving live web pages, this option overrides `--report-format`, forcing
295
295
  HTML.
296
296
 
297
+ ```
298
+ sitediff diff --export
299
+ sitediff diff -e
300
+ ```
301
+
302
+ This will perform the diff and export the results in a gzipped tar file.
303
+
297
304
  ### Running inside containers
298
305
 
299
306
  If you run SiteDiff inside a container or virtual machine, the URLs in its
@@ -441,7 +448,7 @@ before comparison:
441
448
  dom_transform:
442
449
  # Remove current time block
443
450
  - type: remove
444
- - selector: div#block-time
451
+ selector: div#block-time
445
452
  ```
446
453
 
447
454
  #### strip
@@ -458,7 +465,7 @@ To transform `<h1> Foo and Bar\n </h1>` to `<h1>Foo and Bar<\h1>`:
458
465
  dom_transform:
459
466
  # Strip H1 tags
460
467
  - type: strip
461
- - selector: h1
468
+ selector: h1
462
469
  ```
463
470
 
464
471
  #### unwrap
@@ -655,7 +662,7 @@ EG:
655
662
  </div>
656
663
  </region>
657
664
  <region id="body">
658
- <div class=".field-name-attribution">
665
+ <div class="field-name-attribution">
659
666
  <p>Lorem ipsum...
660
667
  </div>
661
668
  </region>
data/lib/sitediff/api.rb CHANGED
@@ -159,7 +159,13 @@ class SiteDiff
159
159
  max_concurrency: @config.setting(:concurrency)
160
160
  )
161
161
  @paths = {}
162
- @config.roots.each do |tag, url|
162
+
163
+ ignore_after = @config.roots
164
+ if @config.roots['before'] == @config.roots['after']
165
+ ignore_after.delete('after')
166
+ end
167
+
168
+ ignore_after.each do |tag, url|
163
169
  Crawler.new(
164
170
  hydra,
165
171
  url,
@@ -178,6 +184,10 @@ class SiteDiff
178
184
 
179
185
  # Write paths to a file.
180
186
  @paths = @paths.values.reduce(&:|).to_a.sort
187
+ if @paths.none? | @paths.nil?
188
+ return
189
+ end
190
+
181
191
  @config.paths_file_write(@paths)
182
192
 
183
193
  # Log output.
@@ -224,7 +234,7 @@ class SiteDiff
224
234
  @config.setting(:interval),
225
235
  @config.setting(:concurrency),
226
236
  get_curl_opts(@config.settings),
227
- options[:debug],
237
+ debug: options[:debug],
228
238
  before: base)
229
239
  fetcher.run do |path, _res|
230
240
  SiteDiff.log "Visited #{path}, cached"
@@ -34,16 +34,16 @@ class SiteDiff
34
34
  @curl_opts = curl_opts
35
35
  @debug = debug
36
36
 
37
- add_uri('', depth)
37
+ add_uri('', depth, referrer: '/')
38
38
  end
39
39
 
40
40
  # Handle a newly found relative URI
41
- def add_uri(rel, depth)
41
+ def add_uri(rel, depth, referrer = '')
42
42
  return if @found.include? rel
43
43
 
44
44
  @found << rel
45
45
 
46
- wrapper = UriWrapper.new(@base + rel, @curl_opts, debug: @debug)
46
+ wrapper = UriWrapper.new(@base + rel, @curl_opts, debug: @debug, referrer:)
47
47
  wrapper.queue(@hydra) do |res|
48
48
  fetched_uri(rel, depth, res)
49
49
  end
@@ -90,12 +90,13 @@ class SiteDiff
90
90
  rels.each do |r|
91
91
  next if @found.include? r
92
92
 
93
- add_uri(r, depth - 1)
93
+ add_uri(r, depth - 1, rel)
94
94
  end
95
95
  end
96
96
 
97
97
  # Resolve a potentially-relative link. Return nil on error.
98
98
  def resolve_link(base, rel)
99
+ rel = rel.strip
99
100
  base + rel
100
101
  rescue Addressable::URI::InvalidURIError
101
102
  SiteDiff.log "skipped invalid URL: '#{rel}' (at #{base})", :warning
@@ -129,6 +130,7 @@ class SiteDiff
129
130
  u.path.start_with?(@base_uri.path)
130
131
  next unless is_sub_uri
131
132
 
133
+ # puts "Trying regex #{u.path}"
132
134
  is_included = @include_regex.nil? ? false : @include_regex.match(u.path)
133
135
  is_excluded = @exclude_regex.nil? ? false : @exclude_regex.match(u.path)
134
136
  if is_excluded && !is_included
@@ -4,6 +4,8 @@
4
4
 
5
5
  # Pages compared.
6
6
  compared_pages = results.length
7
+
8
+ url_hash = '?' + Time.now.strftime("%s%L")
7
9
  %>
8
10
  <!DOCTYPE html>
9
11
  <html>
@@ -134,17 +136,17 @@
134
136
  <div class="buttons">
135
137
  <% unless relative %>
136
138
  <% unless report['before_url_report'] === false %>
137
- <a href="<%= result.url(:before, before_url_report || before, cache) %>" class="button-before" target="_blank">Before</a>
139
+ <a href="<%= result.url(:before, before_url_report || before, cache) + url_hash %>" class="button-before" target="_blank">Before</a>
138
140
  <% end %>
139
141
  <% unless report['after_url_report'] === false %>
140
- <a href="<%= result.url(:after, after_url_report || after, cache) %>" class="button-after" target="_blank">After</a>
142
+ <a href="<%= result.url(:after, after_url_report || after, cache) + url_hash %>" class="button-after" target="_blank">After</a>
141
143
  <% end %>
142
144
  <% unless report['before_url_report'] === false || report['after_url_report'] === false %>
143
- <a href="/sidebyside<%= result.path %>" class="button-both">Both</a>
145
+ <a href="/sidebyside<%= result.path + url_hash %>" class="button-both">Both</a>
144
146
  <% end %>
145
147
  <% end %>
146
148
  <% unless result.diff_url.nil? %>
147
- <a href="<%= result.diff_url(relative: relative) %>" class="button button-diff">View diff</a>
149
+ <a href="<%= result.diff_url(relative: relative) + url_hash %>" class="button button-diff">View diff</a>
148
150
  <% end %>
149
151
  </div>
150
152
  </td>
@@ -29,7 +29,7 @@ sanitization:
29
29
  pattern: '(src="[^"]*/misc/\w+\.js)?v=\d+\.\d+"'
30
30
  substitute: '\1'
31
31
  - title: Strip domain names from absolute URLs
32
- pattern: 'http:\/\/[a-zA-Z0-9.:-]+'
32
+ pattern: 'https?:\/\/[a-zA-Z0-9.:-]+'
33
33
  substitute: '__domain__'
34
34
  - title: Strip form build ID
35
35
  selector: input
@@ -61,3 +61,7 @@ sanitization:
61
61
  selector: script
62
62
  pattern: 'js_[-\w]{40,43}\\?\.js'
63
63
  substitute: 'js__ID__.js'
64
+ dom_transform:
65
+ # Strip Drupal.settings (>8.0)
66
+ - type: remove
67
+ selector: 'script[data-drupal-selector="drupal-settings-json"]'
@@ -147,8 +147,8 @@ class SiteDiff
147
147
  temp_path.mkpath
148
148
  report_path = temp_path + REPORT_DIR
149
149
  report_path.mkpath
150
- files_path = "#{report_path}files"
151
- files_path.mkpath
150
+ files_path = "#{report_path}/files"
151
+ FileUtils.mkpath(files_path)
152
152
  diffs_path = dir + DIFFS_DIR
153
153
 
154
154
  # Move files to place.
@@ -220,10 +220,10 @@ class SiteDiff
220
220
 
221
221
  # Force this object to be a document, so we can apply a stylesheet
222
222
  def self.to_document(obj)
223
- if Nokogiri::XML::Document == obj.class || Nokogiri::HTML::Document == obj.class
223
+ if obj.instance_of?(Nokogiri::XML::Document) || obj.instance_of?(Nokogiri::HTML::Document)
224
224
  obj
225
225
  # node or fragment
226
- elsif Nokogiri::XML::Node == obj.class || Nokogiri::HTML::DocumentFragment == obj.class
226
+ elsif obj.instance_of?(Nokogiri::XML::Node) || obj.instance_of?(Nokogiri::HTML::DocumentFragment)
227
227
  domify(obj.to_s, force_doc: true)
228
228
  else
229
229
  to_document(domify(obj, force_doc: false))
@@ -48,12 +48,13 @@ class SiteDiff
48
48
 
49
49
  ##
50
50
  # Creates a UriWrapper.
51
- def initialize(uri, curl_opts = DEFAULT_CURL_OPTS, debug: true)
51
+ def initialize(uri, curl_opts = DEFAULT_CURL_OPTS, debug: true, referrer: '')
52
52
  @uri = uri.respond_to?(:scheme) ? uri : Addressable::URI.parse(uri)
53
53
  # remove trailing '/'s from local URIs
54
54
  @uri.path.gsub!(%r{/*$}, '') if local?
55
55
  @curl_opts = curl_opts
56
56
  @debug = debug
57
+ @referrer = referrer
57
58
  end
58
59
 
59
60
  ##
@@ -118,6 +119,12 @@ class SiteDiff
118
119
  # Allow basic auth
119
120
  params[:userpwd] = "#{@uri.user}: #{@uri.password}" if @uri.user
120
121
 
122
+ # params['verbose'] = true
123
+ # params['ssl_verifypeer'] = false
124
+ # params['ssl_verifyhost'] = 0
125
+ # params['followlocation'] = true
126
+ # puts to_s
127
+
121
128
  req = Typhoeus::Request.new(to_s, params)
122
129
 
123
130
  req.on_success do |resp|
@@ -136,13 +143,13 @@ class SiteDiff
136
143
  raise if @debug
137
144
 
138
145
  yield ReadResult.error(
139
- "Parsing error for #{@uri}: #{e.message}"
146
+ "Parsing error for #{@uri}: #{e.message} From page: #{@referrer}"
140
147
  )
141
148
  rescue StandardError => e
142
149
  raise if @debug
143
150
 
144
151
  yield ReadResult.error(
145
- "Unknown parsing error for #{@uri}: #{e.message}"
152
+ "Unknown parsing error for #{@uri}: #{e.message} From page: #{@referrer}"
146
153
  )
147
154
  end
148
155
  end
@@ -150,17 +157,17 @@ class SiteDiff
150
157
  req.on_failure do |resp|
151
158
  if resp&.status_message
152
159
  yield ReadResult.error(
153
- "HTTP error when loading #{@uri} : [#{resp.response_code}] #{resp.status_message}",
160
+ "HTTP error when loading #{@uri} : [#{resp.response_code}] #{resp.status_message} From: #{@referrer}",
154
161
  resp.response_code
155
162
  )
156
163
  elsif (msg = resp.options[:return_code])
157
164
  yield ReadResult.error(
158
- "Connection error when loading #{@uri} : [#{resp.options[:return_code]}] #{resp.status_message} #{msg}",
165
+ "Connection error when loading #{@uri} : [#{resp.options[:return_code]}] #{msg} From: #{@referrer}",
159
166
  resp.response_code
160
167
  )
161
168
  else
162
169
  yield ReadResult.error(
163
- "Unknown error when loading #{@uri} : [#{resp.response_code}] #{resp.status_message}",
170
+ "Unknown error when loading #{@uri} : [#{resp.response_code}] #{resp.status_message} From: #{@referrer}",
164
171
  resp.response_code
165
172
  )
166
173
  end
data/lib/sitediff.rb CHANGED
@@ -137,7 +137,7 @@ class SiteDiff
137
137
  rescue StandardError => e
138
138
  raise if @debug
139
139
 
140
- Result.new(path, nil, nil, nil, nil, "Sanitization error: #{e}")
140
+ diff = Result.new(path, nil, nil, nil, nil, "Sanitization error: #{e.message}")
141
141
  end
142
142
  end
143
143
  @results[path] = diff
data/package-lock.json CHANGED
@@ -588,9 +588,9 @@
588
588
  "dev": true
589
589
  },
590
590
  "minimatch": {
591
- "version": "3.0.4",
592
- "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.0.4.tgz",
593
- "integrity": "sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==",
591
+ "version": "3.1.2",
592
+ "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
593
+ "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==",
594
594
  "dev": true,
595
595
  "requires": {
596
596
  "brace-expansion": "^1.1.7"
data/sitediff.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'sitediff'
5
- s.version = '1.2.0'
5
+ s.version = '1.2.4'
6
6
  s.required_ruby_version = '>= 3.1.2'
7
7
  s.summary = 'Compare two versions of a site with ease!'
8
8
  s.description = <<DESC
@@ -35,14 +35,14 @@ DESC
35
35
 
36
36
  s.add_dependency 'minitar', '~> 0.9'
37
37
  s.add_dependency 'thor', '~> 1.2.1'
38
- s.add_dependency 'typhoeus', '~> 1.4.0'
38
+ s.add_dependency 'typhoeus', '~> 1.4.1'
39
39
 
40
40
  # A bug in rubygems can break rainbow 2.2
41
41
  # https://github.com/bundler/bundler/issues/5357
42
42
  s.add_dependency 'rainbow', '~> 3.1.1'
43
43
 
44
44
  # Nokogiri 1.7 is not supported on Ruby 2.0.
45
- s.add_dependency 'nokogiri', '>= 1.13.6'
45
+ s.add_dependency 'nokogiri', '>= 1.14.2'
46
46
 
47
47
  # Diffy and addressable have a max version for Ruby 1.9.
48
48
  s.add_dependency 'addressable', '>= 2.5.2', '< 2.9.0'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sitediff
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0
4
+ version: 1.2.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alex Dergachev
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2022-08-29 00:00:00.000000000 Z
13
+ date: 2023-05-14 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: pkg-config
@@ -60,14 +60,14 @@ dependencies:
60
60
  requirements:
61
61
  - - "~>"
62
62
  - !ruby/object:Gem::Version
63
- version: 1.4.0
63
+ version: 1.4.1
64
64
  type: :runtime
65
65
  prerelease: false
66
66
  version_requirements: !ruby/object:Gem::Requirement
67
67
  requirements:
68
68
  - - "~>"
69
69
  - !ruby/object:Gem::Version
70
- version: 1.4.0
70
+ version: 1.4.1
71
71
  - !ruby/object:Gem::Dependency
72
72
  name: rainbow
73
73
  requirement: !ruby/object:Gem::Requirement
@@ -88,14 +88,14 @@ dependencies:
88
88
  requirements:
89
89
  - - ">="
90
90
  - !ruby/object:Gem::Version
91
- version: 1.13.6
91
+ version: 1.14.2
92
92
  type: :runtime
93
93
  prerelease: false
94
94
  version_requirements: !ruby/object:Gem::Requirement
95
95
  requirements:
96
96
  - - ">="
97
97
  - !ruby/object:Gem::Version
98
- version: 1.13.6
98
+ version: 1.14.2
99
99
  - !ruby/object:Gem::Dependency
100
100
  name: addressable
101
101
  requirement: !ruby/object:Gem::Requirement
@@ -229,7 +229,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
229
229
  - !ruby/object:Gem::Version
230
230
  version: '0'
231
231
  requirements: []
232
- rubygems_version: 3.3.7
232
+ rubygems_version: 3.4.8
233
233
  signing_key:
234
234
  specification_version: 4
235
235
  summary: Compare two versions of a site with ease!