sitediff 1.2.0 → 1.2.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2e37a67bb4f0d7b93f252940d4ee0e4e3184dc27a435626f995b70bad4a7fc40
4
- data.tar.gz: 692d0a82b230e2dbab10fe8f6ee8591ca0128998f4c9acd6b97a37a90856c887
3
+ metadata.gz: d1ee18040cb7cad571450e2e17f9144bdab9965b0c52f84199a8eecb7d046e71
4
+ data.tar.gz: 7d3744b782caae37bbb7fc7789e9fd3d0ae600849dbd13c9d77f60201af56792
5
5
  SHA512:
6
- metadata.gz: 72551efe76eaa6a4a23aeacba6b25cb3bb8b4483d27299846f55ba7a7bad8e256f798de23db36f501a6739875494be808541834bfaadf587fbbd078b6fa62506
7
- data.tar.gz: 977634a139f70794aa5015e42e014e542391f5cf3c07bd34bff1f7128514051c341512d607f1cb9dfb0326c2bee0c80b2c6b4b119e21897adb5bf516fa7a3140
6
+ metadata.gz: 7c3fc26f34f94fcacee9a8d87d7cdda3ea0c9a79aa40b8ff82d8a1f19b677fdda2fee147d6b91a3a9df0c4dc80541b9ae516778abc72fb4046399c18eefafb0d
7
+ data.tar.gz: 7f615aed415a0f313badb5a32873b611200b878587d40c5d5498d06a05088bb91531d4130c7f0f64261c6e9d33a167943b00b3023c2e9ef14c524cc2dc3d9aae
data/CHANGELOG.md CHANGED
@@ -1,6 +1,24 @@
1
1
  # SiteDiff Change Log
2
2
 
3
3
  Contains noteworthy changes made to SiteDiff.
4
+ ## Version 1.2.5
5
+ - Fix issue with whitespace in URLs.
6
+
7
+ ## Version 1.2.4
8
+ - Fix issue with 'store' command.
9
+
10
+ ## Version 1.2.3
11
+ - Fix issue with nil object during diff report generation.
12
+ - Update to export documentation.
13
+
14
+ ## Version 1.2.2
15
+ - Security update for Nokogiri.
16
+ - Minor code updates.
17
+
18
+ ## Version 1.2.1
19
+ - Fixed a bug with report exporting.
20
+ - Prevents crawling the same site twice if the before and after urls are the same.
21
+ - Adding a referrer to the crawler errors.
4
22
 
5
23
  ## Version 1.2.0
6
24
  - Updated requirement to Ruby 3.1.2.
@@ -25,4 +43,4 @@ Contains noteworthy changes made to SiteDiff.
25
43
 
26
44
  ## Prior to 1.0.0
27
45
 
28
- Release notes were out of date, so only tracking changes since 1.0.0 here.
46
+ Release notes were out of date, so only tracking changes since 1.0.0 here.
data/Dockerfile CHANGED
@@ -10,10 +10,10 @@ ARG DEBIAN_FRONTEND=noninteractive
10
10
  # Our build requires rake
11
11
  # Install editors: vim, nano.
12
12
  RUN apt-get update
13
- RUN apt-get install -y apt-utils
14
- RUN apt-get install -y software-properties-common
15
- RUN apt-get install -y make pkg-config libxml2-dev libxslt-dev
16
- RUN apt-get install -y vim nano git
13
+ RUN apt-get install -y apt-utils \
14
+ software-properties-common \
15
+ make pkg-config libxml2-dev libxslt-dev \
16
+ vim nano git
17
17
 
18
18
  # Force nokogiri gem not to compile libxml2, it takes too long
19
19
  ENV NOKOGIRI_USE_SYSTEM_LIBRARIES 1
data/Gemfile.lock CHANGED
@@ -1,41 +1,39 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- sitediff (1.2.0)
4
+ sitediff (1.2.4)
5
5
  addressable (>= 2.5.2, < 2.9.0)
6
6
  diffy (~> 3.4.0)
7
7
  minitar (~> 0.9)
8
- nokogiri (>= 1.13.6)
8
+ nokogiri (>= 1.14.2)
9
9
  pkg-config (~> 1.4)
10
10
  rainbow (~> 3.1.1)
11
11
  thor (~> 1.2.1)
12
- typhoeus (~> 1.4.0)
12
+ typhoeus (~> 1.4.1)
13
13
  webrick (>= 1.7)
14
14
 
15
15
  GEM
16
16
  remote: https://rubygems.org/
17
17
  specs:
18
- addressable (2.8.0)
19
- public_suffix (>= 2.0.2, < 5.0)
18
+ addressable (2.8.1)
19
+ public_suffix (>= 2.0.2, < 6.0)
20
20
  ast (2.4.2)
21
21
  diff-lcs (1.5.0)
22
22
  diffy (3.4.2)
23
- ethon (0.15.0)
24
- ffi (>= 1.15.0)
23
+ ethon (0.10.0)
24
+ ffi (>= 1.3.0)
25
25
  ffi (1.15.5)
26
26
  fileutils (1.1.0)
27
27
  json (2.6.2)
28
- mini_portile2 (2.8.0)
29
28
  minitar (0.9)
30
- nokogiri (1.13.6)
31
- mini_portile2 (~> 2.8.0)
29
+ nokogiri (1.14.2-arm64-darwin)
32
30
  racc (~> 1.4)
33
31
  parallel (1.22.1)
34
32
  parser (3.1.2.0)
35
33
  ast (~> 2.4.1)
36
- pkg-config (1.4.7)
37
- public_suffix (4.0.7)
38
- racc (1.6.0)
34
+ pkg-config (1.5.1)
35
+ public_suffix (5.0.1)
36
+ racc (1.6.2)
39
37
  rainbow (3.1.1)
40
38
  regexp_parser (2.5.0)
41
39
  rexml (3.2.5)
@@ -66,10 +64,10 @@ GEM
66
64
  parser (>= 3.1.1.0)
67
65
  ruby-progressbar (1.11.0)
68
66
  thor (1.2.1)
69
- typhoeus (1.4.0)
70
- ethon (>= 0.9.0)
67
+ typhoeus (1.4.1)
68
+ ethon (= 0.10.0)
71
69
  unicode-display_width (2.2.0)
72
- webrick (1.7.0)
70
+ webrick (1.8.1)
73
71
 
74
72
  PLATFORMS
75
73
  ruby
data/INSTALLATION.md CHANGED
@@ -36,7 +36,7 @@ avoid using `sudo` for `gem install`.
36
36
 
37
37
  ```bash
38
38
  gem install nokogiri --no-rdoc --no-ri -- --use-system-libraries=true —with-xml2-include=/usr/include/libxml2
39
- gem install sitediff -v '1.2.0'
39
+ gem install sitediff -v '1.2.1'
40
40
  ```
41
41
 
42
42
  ## Docker
@@ -88,7 +88,7 @@ If possible avoid using `sudo` for `gem install`.
88
88
 
89
89
  ```bash
90
90
  gem install nokogiri --no-rdoc --no-ri -- --use-system-libraries=true —with-xml2-include=/usr/include/libxml2
91
- gem install sitediff -v '1.2.0'
91
+ gem install sitediff -v '1.2.1'
92
92
  ```
93
93
 
94
94
  ## Ubuntu
data/README.md CHANGED
@@ -294,6 +294,13 @@ Generate a gzipped tar file containing the HTML report instead of generating
294
294
  and serving live web pages, this option overrides `--report-format`, forcing
295
295
  HTML.
296
296
 
297
+ ```
298
+ sitediff diff --export
299
+ sitediff diff -e
300
+ ```
301
+
302
+ This will perform the diff and export the results in a gzipped tar file.
303
+
297
304
  ### Running inside containers
298
305
 
299
306
  If you run SiteDiff inside a container or virtual machine, the URLs in its
@@ -441,7 +448,7 @@ before comparison:
441
448
  dom_transform:
442
449
  # Remove current time block
443
450
  - type: remove
444
- - selector: div#block-time
451
+ selector: div#block-time
445
452
  ```
446
453
 
447
454
  #### strip
@@ -458,7 +465,7 @@ To transform `<h1> Foo and Bar\n </h1>` to `<h1>Foo and Bar<\h1>`:
458
465
  dom_transform:
459
466
  # Strip H1 tags
460
467
  - type: strip
461
- - selector: h1
468
+ selector: h1
462
469
  ```
463
470
 
464
471
  #### unwrap
@@ -655,7 +662,7 @@ EG:
655
662
  </div>
656
663
  </region>
657
664
  <region id="body">
658
- <div class=".field-name-attribution">
665
+ <div class="field-name-attribution">
659
666
  <p>Lorem ipsum...
660
667
  </div>
661
668
  </region>
data/lib/sitediff/api.rb CHANGED
@@ -159,7 +159,13 @@ class SiteDiff
159
159
  max_concurrency: @config.setting(:concurrency)
160
160
  )
161
161
  @paths = {}
162
- @config.roots.each do |tag, url|
162
+
163
+ ignore_after = @config.roots
164
+ if @config.roots['before'] == @config.roots['after']
165
+ ignore_after.delete('after')
166
+ end
167
+
168
+ ignore_after.each do |tag, url|
163
169
  Crawler.new(
164
170
  hydra,
165
171
  url,
@@ -178,6 +184,10 @@ class SiteDiff
178
184
 
179
185
  # Write paths to a file.
180
186
  @paths = @paths.values.reduce(&:|).to_a.sort
187
+ if @paths.none? | @paths.nil?
188
+ return
189
+ end
190
+
181
191
  @config.paths_file_write(@paths)
182
192
 
183
193
  # Log output.
@@ -224,7 +234,7 @@ class SiteDiff
224
234
  @config.setting(:interval),
225
235
  @config.setting(:concurrency),
226
236
  get_curl_opts(@config.settings),
227
- options[:debug],
237
+ debug: options[:debug],
228
238
  before: base)
229
239
  fetcher.run do |path, _res|
230
240
  SiteDiff.log "Visited #{path}, cached"
@@ -34,16 +34,16 @@ class SiteDiff
34
34
  @curl_opts = curl_opts
35
35
  @debug = debug
36
36
 
37
- add_uri('', depth)
37
+ add_uri('', depth, referrer: '/')
38
38
  end
39
39
 
40
40
  # Handle a newly found relative URI
41
- def add_uri(rel, depth)
41
+ def add_uri(rel, depth, referrer = '')
42
42
  return if @found.include? rel
43
43
 
44
44
  @found << rel
45
45
 
46
- wrapper = UriWrapper.new(@base + rel, @curl_opts, debug: @debug)
46
+ wrapper = UriWrapper.new(@base + rel, @curl_opts, debug: @debug, referrer:)
47
47
  wrapper.queue(@hydra) do |res|
48
48
  fetched_uri(rel, depth, res)
49
49
  end
@@ -90,12 +90,13 @@ class SiteDiff
90
90
  rels.each do |r|
91
91
  next if @found.include? r
92
92
 
93
- add_uri(r, depth - 1)
93
+ add_uri(r, depth - 1, rel)
94
94
  end
95
95
  end
96
96
 
97
97
  # Resolve a potentially-relative link. Return nil on error.
98
98
  def resolve_link(base, rel)
99
+ rel = rel.strip
99
100
  base + rel
100
101
  rescue Addressable::URI::InvalidURIError
101
102
  SiteDiff.log "skipped invalid URL: '#{rel}' (at #{base})", :warning
@@ -129,6 +130,7 @@ class SiteDiff
129
130
  u.path.start_with?(@base_uri.path)
130
131
  next unless is_sub_uri
131
132
 
133
+ # puts "Trying regex #{u.path}"
132
134
  is_included = @include_regex.nil? ? false : @include_regex.match(u.path)
133
135
  is_excluded = @exclude_regex.nil? ? false : @exclude_regex.match(u.path)
134
136
  if is_excluded && !is_included
@@ -4,6 +4,8 @@
4
4
 
5
5
  # Pages compared.
6
6
  compared_pages = results.length
7
+
8
+ url_hash = '?' + Time.now.strftime("%s%L")
7
9
  %>
8
10
  <!DOCTYPE html>
9
11
  <html>
@@ -134,17 +136,17 @@
134
136
  <div class="buttons">
135
137
  <% unless relative %>
136
138
  <% unless report['before_url_report'] === false %>
137
- <a href="<%= result.url(:before, before_url_report || before, cache) %>" class="button-before" target="_blank">Before</a>
139
+ <a href="<%= result.url(:before, before_url_report || before, cache) + url_hash %>" class="button-before" target="_blank">Before</a>
138
140
  <% end %>
139
141
  <% unless report['after_url_report'] === false %>
140
- <a href="<%= result.url(:after, after_url_report || after, cache) %>" class="button-after" target="_blank">After</a>
142
+ <a href="<%= result.url(:after, after_url_report || after, cache) + url_hash %>" class="button-after" target="_blank">After</a>
141
143
  <% end %>
142
144
  <% unless report['before_url_report'] === false || report['after_url_report'] === false %>
143
- <a href="/sidebyside<%= result.path %>" class="button-both">Both</a>
145
+ <a href="/sidebyside<%= result.path + url_hash %>" class="button-both">Both</a>
144
146
  <% end %>
145
147
  <% end %>
146
148
  <% unless result.diff_url.nil? %>
147
- <a href="<%= result.diff_url(relative: relative) %>" class="button button-diff">View diff</a>
149
+ <a href="<%= result.diff_url(relative: relative) + url_hash %>" class="button button-diff">View diff</a>
148
150
  <% end %>
149
151
  </div>
150
152
  </td>
@@ -29,7 +29,7 @@ sanitization:
29
29
  pattern: '(src="[^"]*/misc/\w+\.js)?v=\d+\.\d+"'
30
30
  substitute: '\1'
31
31
  - title: Strip domain names from absolute URLs
32
- pattern: 'http:\/\/[a-zA-Z0-9.:-]+'
32
+ pattern: 'https?:\/\/[a-zA-Z0-9.:-]+'
33
33
  substitute: '__domain__'
34
34
  - title: Strip form build ID
35
35
  selector: input
@@ -61,3 +61,7 @@ sanitization:
61
61
  selector: script
62
62
  pattern: 'js_[-\w]{40,43}\\?\.js'
63
63
  substitute: 'js__ID__.js'
64
+ dom_transform:
65
+ # Strip Drupal.settings (>8.0)
66
+ - type: remove
67
+ selector: 'script[data-drupal-selector="drupal-settings-json"]'
@@ -147,8 +147,8 @@ class SiteDiff
147
147
  temp_path.mkpath
148
148
  report_path = temp_path + REPORT_DIR
149
149
  report_path.mkpath
150
- files_path = "#{report_path}files"
151
- files_path.mkpath
150
+ files_path = "#{report_path}/files"
151
+ FileUtils.mkpath(files_path)
152
152
  diffs_path = dir + DIFFS_DIR
153
153
 
154
154
  # Move files to place.
@@ -220,10 +220,10 @@ class SiteDiff
220
220
 
221
221
  # Force this object to be a document, so we can apply a stylesheet
222
222
  def self.to_document(obj)
223
- if Nokogiri::XML::Document == obj.class || Nokogiri::HTML::Document == obj.class
223
+ if obj.instance_of?(Nokogiri::XML::Document) || obj.instance_of?(Nokogiri::HTML::Document)
224
224
  obj
225
225
  # node or fragment
226
- elsif Nokogiri::XML::Node == obj.class || Nokogiri::HTML::DocumentFragment == obj.class
226
+ elsif obj.instance_of?(Nokogiri::XML::Node) || obj.instance_of?(Nokogiri::HTML::DocumentFragment)
227
227
  domify(obj.to_s, force_doc: true)
228
228
  else
229
229
  to_document(domify(obj, force_doc: false))
@@ -48,12 +48,13 @@ class SiteDiff
48
48
 
49
49
  ##
50
50
  # Creates a UriWrapper.
51
- def initialize(uri, curl_opts = DEFAULT_CURL_OPTS, debug: true)
51
+ def initialize(uri, curl_opts = DEFAULT_CURL_OPTS, debug: true, referrer: '')
52
52
  @uri = uri.respond_to?(:scheme) ? uri : Addressable::URI.parse(uri)
53
53
  # remove trailing '/'s from local URIs
54
54
  @uri.path.gsub!(%r{/*$}, '') if local?
55
55
  @curl_opts = curl_opts
56
56
  @debug = debug
57
+ @referrer = referrer
57
58
  end
58
59
 
59
60
  ##
@@ -118,6 +119,12 @@ class SiteDiff
118
119
  # Allow basic auth
119
120
  params[:userpwd] = "#{@uri.user}: #{@uri.password}" if @uri.user
120
121
 
122
+ # params['verbose'] = true
123
+ # params['ssl_verifypeer'] = false
124
+ # params['ssl_verifyhost'] = 0
125
+ # params['followlocation'] = true
126
+ # puts to_s
127
+
121
128
  req = Typhoeus::Request.new(to_s, params)
122
129
 
123
130
  req.on_success do |resp|
@@ -136,13 +143,13 @@ class SiteDiff
136
143
  raise if @debug
137
144
 
138
145
  yield ReadResult.error(
139
- "Parsing error for #{@uri}: #{e.message}"
146
+ "Parsing error for #{@uri}: #{e.message} From page: #{@referrer}"
140
147
  )
141
148
  rescue StandardError => e
142
149
  raise if @debug
143
150
 
144
151
  yield ReadResult.error(
145
- "Unknown parsing error for #{@uri}: #{e.message}"
152
+ "Unknown parsing error for #{@uri}: #{e.message} From page: #{@referrer}"
146
153
  )
147
154
  end
148
155
  end
@@ -150,17 +157,17 @@ class SiteDiff
150
157
  req.on_failure do |resp|
151
158
  if resp&.status_message
152
159
  yield ReadResult.error(
153
- "HTTP error when loading #{@uri} : [#{resp.response_code}] #{resp.status_message}",
160
+ "HTTP error when loading #{@uri} : [#{resp.response_code}] #{resp.status_message} From: #{@referrer}",
154
161
  resp.response_code
155
162
  )
156
163
  elsif (msg = resp.options[:return_code])
157
164
  yield ReadResult.error(
158
- "Connection error when loading #{@uri} : [#{resp.options[:return_code]}] #{resp.status_message} #{msg}",
165
+ "Connection error when loading #{@uri} : [#{resp.options[:return_code]}] #{msg} From: #{@referrer}",
159
166
  resp.response_code
160
167
  )
161
168
  else
162
169
  yield ReadResult.error(
163
- "Unknown error when loading #{@uri} : [#{resp.response_code}] #{resp.status_message}",
170
+ "Unknown error when loading #{@uri} : [#{resp.response_code}] #{resp.status_message} From: #{@referrer}",
164
171
  resp.response_code
165
172
  )
166
173
  end
data/lib/sitediff.rb CHANGED
@@ -137,7 +137,7 @@ class SiteDiff
137
137
  rescue StandardError => e
138
138
  raise if @debug
139
139
 
140
- Result.new(path, nil, nil, nil, nil, "Sanitization error: #{e}")
140
+ diff = Result.new(path, nil, nil, nil, nil, "Sanitization error: #{e.message}")
141
141
  end
142
142
  end
143
143
  @results[path] = diff
data/package-lock.json CHANGED
@@ -588,9 +588,9 @@
588
588
  "dev": true
589
589
  },
590
590
  "minimatch": {
591
- "version": "3.0.4",
592
- "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.0.4.tgz",
593
- "integrity": "sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==",
591
+ "version": "3.1.2",
592
+ "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
593
+ "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==",
594
594
  "dev": true,
595
595
  "requires": {
596
596
  "brace-expansion": "^1.1.7"
data/sitediff.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'sitediff'
5
- s.version = '1.2.0'
5
+ s.version = '1.2.4'
6
6
  s.required_ruby_version = '>= 3.1.2'
7
7
  s.summary = 'Compare two versions of a site with ease!'
8
8
  s.description = <<DESC
@@ -35,14 +35,14 @@ DESC
35
35
 
36
36
  s.add_dependency 'minitar', '~> 0.9'
37
37
  s.add_dependency 'thor', '~> 1.2.1'
38
- s.add_dependency 'typhoeus', '~> 1.4.0'
38
+ s.add_dependency 'typhoeus', '~> 1.4.1'
39
39
 
40
40
  # A bug in rubygems can break rainbow 2.2
41
41
  # https://github.com/bundler/bundler/issues/5357
42
42
  s.add_dependency 'rainbow', '~> 3.1.1'
43
43
 
44
44
  # Nokogiri 1.7 is not supported on Ruby 2.0.
45
- s.add_dependency 'nokogiri', '>= 1.13.6'
45
+ s.add_dependency 'nokogiri', '>= 1.14.2'
46
46
 
47
47
  # Diffy and addressable have a max version for Ruby 1.9.
48
48
  s.add_dependency 'addressable', '>= 2.5.2', '< 2.9.0'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sitediff
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0
4
+ version: 1.2.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alex Dergachev
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2022-08-29 00:00:00.000000000 Z
13
+ date: 2023-05-14 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: pkg-config
@@ -60,14 +60,14 @@ dependencies:
60
60
  requirements:
61
61
  - - "~>"
62
62
  - !ruby/object:Gem::Version
63
- version: 1.4.0
63
+ version: 1.4.1
64
64
  type: :runtime
65
65
  prerelease: false
66
66
  version_requirements: !ruby/object:Gem::Requirement
67
67
  requirements:
68
68
  - - "~>"
69
69
  - !ruby/object:Gem::Version
70
- version: 1.4.0
70
+ version: 1.4.1
71
71
  - !ruby/object:Gem::Dependency
72
72
  name: rainbow
73
73
  requirement: !ruby/object:Gem::Requirement
@@ -88,14 +88,14 @@ dependencies:
88
88
  requirements:
89
89
  - - ">="
90
90
  - !ruby/object:Gem::Version
91
- version: 1.13.6
91
+ version: 1.14.2
92
92
  type: :runtime
93
93
  prerelease: false
94
94
  version_requirements: !ruby/object:Gem::Requirement
95
95
  requirements:
96
96
  - - ">="
97
97
  - !ruby/object:Gem::Version
98
- version: 1.13.6
98
+ version: 1.14.2
99
99
  - !ruby/object:Gem::Dependency
100
100
  name: addressable
101
101
  requirement: !ruby/object:Gem::Requirement
@@ -229,7 +229,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
229
229
  - !ruby/object:Gem::Version
230
230
  version: '0'
231
231
  requirements: []
232
- rubygems_version: 3.3.7
232
+ rubygems_version: 3.4.8
233
233
  signing_key:
234
234
  specification_version: 4
235
235
  summary: Compare two versions of a site with ease!