sitediff 1.2.1 → 1.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: dff12d889984ec88ad662c2a0f2f3e0771b2a2c6cbc8e5f0442773ab36a51e7c
4
- data.tar.gz: 96541e827d456c925677821c501297b68b284828584f35e499b4b35da75f962f
3
+ metadata.gz: d1ee18040cb7cad571450e2e17f9144bdab9965b0c52f84199a8eecb7d046e71
4
+ data.tar.gz: 7d3744b782caae37bbb7fc7789e9fd3d0ae600849dbd13c9d77f60201af56792
5
5
  SHA512:
6
- metadata.gz: 3044d99f7494697d817f4ab545308987dbcaebd007f531f9113c2298f3f1550952967f92f0223a96782efed424b4c2ca97123fb0fce20a382b955086afef3386
7
- data.tar.gz: 7715c7285734dad676fe95cc4fc4b6cd69411a073a4c3bca9b839ff152dfa427b0b6655159e54fc7a6e82c6ac3c0c5c9c86f312c9c7d287fc5101ec6cff0d23b
6
+ metadata.gz: 7c3fc26f34f94fcacee9a8d87d7cdda3ea0c9a79aa40b8ff82d8a1f19b677fdda2fee147d6b91a3a9df0c4dc80541b9ae516778abc72fb4046399c18eefafb0d
7
+ data.tar.gz: 7f615aed415a0f313badb5a32873b611200b878587d40c5d5498d06a05088bb91531d4130c7f0f64261c6e9d33a167943b00b3023c2e9ef14c524cc2dc3d9aae
data/CHANGELOG.md CHANGED
@@ -1,6 +1,19 @@
1
1
  # SiteDiff Change Log
2
2
 
3
3
  Contains noteworthy changes made to SiteDiff.
4
+ ## Version 1.2.5
5
+ - Fix issue with whitespace in URLs.
6
+
7
+ ## Version 1.2.4
8
+ - Fix issue with 'store' command.
9
+
10
+ ## Version 1.2.3
11
+ - Fix issue with nil object during diff report generation.
12
+ - Update to export documentation.
13
+
14
+ ## Version 1.2.2
15
+ - Security update for Nokogiri.
16
+ - Minor code updates.
4
17
 
5
18
  ## Version 1.2.1
6
19
  - Fixed a bug with report exporting.
@@ -30,4 +43,4 @@ Contains noteworthy changes made to SiteDiff.
30
43
 
31
44
  ## Prior to 1.0.0
32
45
 
33
- Release notes were out of date, so only tracking changes since 1.0.0 here.
46
+ Release notes were out of date, so only tracking changes since 1.0.0 here.
data/Dockerfile CHANGED
@@ -10,10 +10,10 @@ ARG DEBIAN_FRONTEND=noninteractive
10
10
  # Our build requires rake
11
11
  # Install editors: vim, nano.
12
12
  RUN apt-get update
13
- RUN apt-get install -y apt-utils
14
- RUN apt-get install -y software-properties-common
15
- RUN apt-get install -y make pkg-config libxml2-dev libxslt-dev
16
- RUN apt-get install -y vim nano git
13
+ RUN apt-get install -y apt-utils \
14
+ software-properties-common \
15
+ make pkg-config libxml2-dev libxslt-dev \
16
+ vim nano git
17
17
 
18
18
  # Force nokogiri gem not to compile libxml2, it takes too long
19
19
  ENV NOKOGIRI_USE_SYSTEM_LIBRARIES 1
data/Gemfile.lock CHANGED
@@ -1,41 +1,39 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- sitediff (1.2.1)
4
+ sitediff (1.2.4)
5
5
  addressable (>= 2.5.2, < 2.9.0)
6
6
  diffy (~> 3.4.0)
7
7
  minitar (~> 0.9)
8
- nokogiri (>= 1.13.6)
8
+ nokogiri (>= 1.14.2)
9
9
  pkg-config (~> 1.4)
10
10
  rainbow (~> 3.1.1)
11
11
  thor (~> 1.2.1)
12
- typhoeus (~> 1.4.0)
12
+ typhoeus (~> 1.4.1)
13
13
  webrick (>= 1.7)
14
14
 
15
15
  GEM
16
16
  remote: https://rubygems.org/
17
17
  specs:
18
- addressable (2.8.0)
19
- public_suffix (>= 2.0.2, < 5.0)
18
+ addressable (2.8.1)
19
+ public_suffix (>= 2.0.2, < 6.0)
20
20
  ast (2.4.2)
21
21
  diff-lcs (1.5.0)
22
22
  diffy (3.4.2)
23
- ethon (0.15.0)
24
- ffi (>= 1.15.0)
23
+ ethon (0.10.0)
24
+ ffi (>= 1.3.0)
25
25
  ffi (1.15.5)
26
26
  fileutils (1.1.0)
27
27
  json (2.6.2)
28
- mini_portile2 (2.8.0)
29
28
  minitar (0.9)
30
- nokogiri (1.13.6)
31
- mini_portile2 (~> 2.8.0)
29
+ nokogiri (1.14.2-arm64-darwin)
32
30
  racc (~> 1.4)
33
31
  parallel (1.22.1)
34
32
  parser (3.1.2.0)
35
33
  ast (~> 2.4.1)
36
- pkg-config (1.4.7)
37
- public_suffix (4.0.7)
38
- racc (1.6.0)
34
+ pkg-config (1.5.1)
35
+ public_suffix (5.0.1)
36
+ racc (1.6.2)
39
37
  rainbow (3.1.1)
40
38
  regexp_parser (2.5.0)
41
39
  rexml (3.2.5)
@@ -66,10 +64,10 @@ GEM
66
64
  parser (>= 3.1.1.0)
67
65
  ruby-progressbar (1.11.0)
68
66
  thor (1.2.1)
69
- typhoeus (1.4.0)
70
- ethon (>= 0.9.0)
67
+ typhoeus (1.4.1)
68
+ ethon (= 0.10.0)
71
69
  unicode-display_width (2.2.0)
72
- webrick (1.7.0)
70
+ webrick (1.8.1)
73
71
 
74
72
  PLATFORMS
75
73
  ruby
data/README.md CHANGED
@@ -294,6 +294,13 @@ Generate a gzipped tar file containing the HTML report instead of generating
294
294
  and serving live web pages, this option overrides `--report-format`, forcing
295
295
  HTML.
296
296
 
297
+ ```
298
+ sitediff diff --export
299
+ sitediff diff -e
300
+ ```
301
+
302
+ This will perform the diff and export the results in a gzipped tar file.
303
+
297
304
  ### Running inside containers
298
305
 
299
306
  If you run SiteDiff inside a container or virtual machine, the URLs in its
@@ -441,7 +448,7 @@ before comparison:
441
448
  dom_transform:
442
449
  # Remove current time block
443
450
  - type: remove
444
- - selector: div#block-time
451
+ selector: div#block-time
445
452
  ```
446
453
 
447
454
  #### strip
@@ -458,7 +465,7 @@ To transform `<h1> Foo and Bar\n </h1>` to `<h1>Foo and Bar<\h1>`:
458
465
  dom_transform:
459
466
  # Strip H1 tags
460
467
  - type: strip
461
- - selector: h1
468
+ selector: h1
462
469
  ```
463
470
 
464
471
  #### unwrap
@@ -655,7 +662,7 @@ EG:
655
662
  </div>
656
663
  </region>
657
664
  <region id="body">
658
- <div class=".field-name-attribution">
665
+ <div class="field-name-attribution">
659
666
  <p>Lorem ipsum...
660
667
  </div>
661
668
  </region>
data/lib/sitediff/api.rb CHANGED
@@ -160,12 +160,12 @@ class SiteDiff
160
160
  )
161
161
  @paths = {}
162
162
 
163
- ignoreAfter = @config.roots
163
+ ignore_after = @config.roots
164
164
  if @config.roots['before'] == @config.roots['after']
165
- ignoreAfter.delete('after')
165
+ ignore_after.delete('after')
166
166
  end
167
-
168
- ignoreAfter.each do |tag, url|
167
+
168
+ ignore_after.each do |tag, url|
169
169
  Crawler.new(
170
170
  hydra,
171
171
  url,
@@ -184,6 +184,10 @@ class SiteDiff
184
184
 
185
185
  # Write paths to a file.
186
186
  @paths = @paths.values.reduce(&:|).to_a.sort
187
+ if @paths.none? | @paths.nil?
188
+ return
189
+ end
190
+
187
191
  @config.paths_file_write(@paths)
188
192
 
189
193
  # Log output.
@@ -230,7 +234,7 @@ class SiteDiff
230
234
  @config.setting(:interval),
231
235
  @config.setting(:concurrency),
232
236
  get_curl_opts(@config.settings),
233
- options[:debug],
237
+ debug: options[:debug],
234
238
  before: base)
235
239
  fetcher.run do |path, _res|
236
240
  SiteDiff.log "Visited #{path}, cached"
@@ -43,7 +43,7 @@ class SiteDiff
43
43
 
44
44
  @found << rel
45
45
 
46
- wrapper = UriWrapper.new(@base + rel, @curl_opts, debug: @debug, referrer: referrer)
46
+ wrapper = UriWrapper.new(@base + rel, @curl_opts, debug: @debug, referrer:)
47
47
  wrapper.queue(@hydra) do |res|
48
48
  fetched_uri(rel, depth, res)
49
49
  end
@@ -96,6 +96,7 @@ class SiteDiff
96
96
 
97
97
  # Resolve a potentially-relative link. Return nil on error.
98
98
  def resolve_link(base, rel)
99
+ rel = rel.strip
99
100
  base + rel
100
101
  rescue Addressable::URI::InvalidURIError
101
102
  SiteDiff.log "skipped invalid URL: '#{rel}' (at #{base})", :warning
@@ -129,6 +130,7 @@ class SiteDiff
129
130
  u.path.start_with?(@base_uri.path)
130
131
  next unless is_sub_uri
131
132
 
133
+ # puts "Trying regex #{u.path}"
132
134
  is_included = @include_regex.nil? ? false : @include_regex.match(u.path)
133
135
  is_excluded = @exclude_regex.nil? ? false : @exclude_regex.match(u.path)
134
136
  if is_excluded && !is_included
@@ -4,6 +4,8 @@
4
4
 
5
5
  # Pages compared.
6
6
  compared_pages = results.length
7
+
8
+ url_hash = '?' + Time.now.strftime("%s%L")
7
9
  %>
8
10
  <!DOCTYPE html>
9
11
  <html>
@@ -134,17 +136,17 @@
134
136
  <div class="buttons">
135
137
  <% unless relative %>
136
138
  <% unless report['before_url_report'] === false %>
137
- <a href="<%= result.url(:before, before_url_report || before, cache) %>" class="button-before" target="_blank">Before</a>
139
+ <a href="<%= result.url(:before, before_url_report || before, cache) + url_hash %>" class="button-before" target="_blank">Before</a>
138
140
  <% end %>
139
141
  <% unless report['after_url_report'] === false %>
140
- <a href="<%= result.url(:after, after_url_report || after, cache) %>" class="button-after" target="_blank">After</a>
142
+ <a href="<%= result.url(:after, after_url_report || after, cache) + url_hash %>" class="button-after" target="_blank">After</a>
141
143
  <% end %>
142
144
  <% unless report['before_url_report'] === false || report['after_url_report'] === false %>
143
- <a href="/sidebyside<%= result.path %>" class="button-both">Both</a>
145
+ <a href="/sidebyside<%= result.path + url_hash %>" class="button-both">Both</a>
144
146
  <% end %>
145
147
  <% end %>
146
148
  <% unless result.diff_url.nil? %>
147
- <a href="<%= result.diff_url(relative: relative) %>" class="button button-diff">View diff</a>
149
+ <a href="<%= result.diff_url(relative: relative) + url_hash %>" class="button button-diff">View diff</a>
148
150
  <% end %>
149
151
  </div>
150
152
  </td>
@@ -29,7 +29,7 @@ sanitization:
29
29
  pattern: '(src="[^"]*/misc/\w+\.js)?v=\d+\.\d+"'
30
30
  substitute: '\1'
31
31
  - title: Strip domain names from absolute URLs
32
- pattern: 'http:\/\/[a-zA-Z0-9.:-]+'
32
+ pattern: 'https?:\/\/[a-zA-Z0-9.:-]+'
33
33
  substitute: '__domain__'
34
34
  - title: Strip form build ID
35
35
  selector: input
@@ -61,3 +61,7 @@ sanitization:
61
61
  selector: script
62
62
  pattern: 'js_[-\w]{40,43}\\?\.js'
63
63
  substitute: 'js__ID__.js'
64
+ dom_transform:
65
+ # Strip Drupal.settings (>8.0)
66
+ - type: remove
67
+ selector: 'script[data-drupal-selector="drupal-settings-json"]'
@@ -147,8 +147,8 @@ class SiteDiff
147
147
  temp_path.mkpath
148
148
  report_path = temp_path + REPORT_DIR
149
149
  report_path.mkpath
150
- files_path = report_path + "files"
151
- files_path.mkpath
150
+ files_path = "#{report_path}/files"
151
+ FileUtils.mkpath(files_path)
152
152
  diffs_path = dir + DIFFS_DIR
153
153
 
154
154
  # Move files to place.
@@ -220,10 +220,10 @@ class SiteDiff
220
220
 
221
221
  # Force this object to be a document, so we can apply a stylesheet
222
222
  def self.to_document(obj)
223
- if Nokogiri::XML::Document == obj.class || Nokogiri::HTML::Document == obj.class
223
+ if obj.instance_of?(Nokogiri::XML::Document) || obj.instance_of?(Nokogiri::HTML::Document)
224
224
  obj
225
225
  # node or fragment
226
- elsif Nokogiri::XML::Node == obj.class || Nokogiri::HTML::DocumentFragment == obj.class
226
+ elsif obj.instance_of?(Nokogiri::XML::Node) || obj.instance_of?(Nokogiri::HTML::DocumentFragment)
227
227
  domify(obj.to_s, force_doc: true)
228
228
  else
229
229
  to_document(domify(obj, force_doc: false))
@@ -119,6 +119,12 @@ class SiteDiff
119
119
  # Allow basic auth
120
120
  params[:userpwd] = "#{@uri.user}: #{@uri.password}" if @uri.user
121
121
 
122
+ # params['verbose'] = true
123
+ # params['ssl_verifypeer'] = false
124
+ # params['ssl_verifyhost'] = 0
125
+ # params['followlocation'] = true
126
+ # puts to_s
127
+
122
128
  req = Typhoeus::Request.new(to_s, params)
123
129
 
124
130
  req.on_success do |resp|
@@ -148,20 +154,20 @@ class SiteDiff
148
154
  end
149
155
  end
150
156
 
151
- req.on_failure do |resp|
157
+ req.on_failure do |resp|
152
158
  if resp&.status_message
153
159
  yield ReadResult.error(
154
- "HTTP error when loading #{@uri} : [#{resp.response_code}] #{resp.status_message} From page: #{@referrer}",
160
+ "HTTP error when loading #{@uri} : [#{resp.response_code}] #{resp.status_message} From: #{@referrer}",
155
161
  resp.response_code
156
162
  )
157
163
  elsif (msg = resp.options[:return_code])
158
164
  yield ReadResult.error(
159
- "Connection error when loading #{@uri} : [#{resp.options[:return_code]}] #{msg} From page: #{@referrer}",
165
+ "Connection error when loading #{@uri} : [#{resp.options[:return_code]}] #{msg} From: #{@referrer}",
160
166
  resp.response_code
161
167
  )
162
168
  else
163
169
  yield ReadResult.error(
164
- "Unknown error when loading #{@uri} : [#{resp.response_code}] #{resp.status_message} From page: #{@referrer}",
170
+ "Unknown error when loading #{@uri} : [#{resp.response_code}] #{resp.status_message} From: #{@referrer}",
165
171
  resp.response_code
166
172
  )
167
173
  end
data/lib/sitediff.rb CHANGED
@@ -137,7 +137,7 @@ class SiteDiff
137
137
  rescue StandardError => e
138
138
  raise if @debug
139
139
 
140
- Result.new(path, nil, nil, nil, nil, "Sanitization error: #{e}")
140
+ diff = Result.new(path, nil, nil, nil, nil, "Sanitization error: #{e.message}")
141
141
  end
142
142
  end
143
143
  @results[path] = diff
data/package-lock.json CHANGED
@@ -588,9 +588,9 @@
588
588
  "dev": true
589
589
  },
590
590
  "minimatch": {
591
- "version": "3.0.4",
592
- "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.0.4.tgz",
593
- "integrity": "sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==",
591
+ "version": "3.1.2",
592
+ "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
593
+ "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==",
594
594
  "dev": true,
595
595
  "requires": {
596
596
  "brace-expansion": "^1.1.7"
data/sitediff.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'sitediff'
5
- s.version = '1.2.1'
5
+ s.version = '1.2.4'
6
6
  s.required_ruby_version = '>= 3.1.2'
7
7
  s.summary = 'Compare two versions of a site with ease!'
8
8
  s.description = <<DESC
@@ -35,14 +35,14 @@ DESC
35
35
 
36
36
  s.add_dependency 'minitar', '~> 0.9'
37
37
  s.add_dependency 'thor', '~> 1.2.1'
38
- s.add_dependency 'typhoeus', '~> 1.4.0'
38
+ s.add_dependency 'typhoeus', '~> 1.4.1'
39
39
 
40
40
  # A bug in rubygems can break rainbow 2.2
41
41
  # https://github.com/bundler/bundler/issues/5357
42
42
  s.add_dependency 'rainbow', '~> 3.1.1'
43
43
 
44
44
  # Nokogiri 1.7 is not supported on Ruby 2.0.
45
- s.add_dependency 'nokogiri', '>= 1.13.6'
45
+ s.add_dependency 'nokogiri', '>= 1.14.2'
46
46
 
47
47
  # Diffy and addressable have a max version for Ruby 1.9.
48
48
  s.add_dependency 'addressable', '>= 2.5.2', '< 2.9.0'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sitediff
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.1
4
+ version: 1.2.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alex Dergachev
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2022-09-29 00:00:00.000000000 Z
13
+ date: 2023-05-14 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: pkg-config
@@ -60,14 +60,14 @@ dependencies:
60
60
  requirements:
61
61
  - - "~>"
62
62
  - !ruby/object:Gem::Version
63
- version: 1.4.0
63
+ version: 1.4.1
64
64
  type: :runtime
65
65
  prerelease: false
66
66
  version_requirements: !ruby/object:Gem::Requirement
67
67
  requirements:
68
68
  - - "~>"
69
69
  - !ruby/object:Gem::Version
70
- version: 1.4.0
70
+ version: 1.4.1
71
71
  - !ruby/object:Gem::Dependency
72
72
  name: rainbow
73
73
  requirement: !ruby/object:Gem::Requirement
@@ -88,14 +88,14 @@ dependencies:
88
88
  requirements:
89
89
  - - ">="
90
90
  - !ruby/object:Gem::Version
91
- version: 1.13.6
91
+ version: 1.14.2
92
92
  type: :runtime
93
93
  prerelease: false
94
94
  version_requirements: !ruby/object:Gem::Requirement
95
95
  requirements:
96
96
  - - ">="
97
97
  - !ruby/object:Gem::Version
98
- version: 1.13.6
98
+ version: 1.14.2
99
99
  - !ruby/object:Gem::Dependency
100
100
  name: addressable
101
101
  requirement: !ruby/object:Gem::Requirement
@@ -229,7 +229,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
229
229
  - !ruby/object:Gem::Version
230
230
  version: '0'
231
231
  requirements: []
232
- rubygems_version: 3.3.7
232
+ rubygems_version: 3.4.8
233
233
  signing_key:
234
234
  specification_version: 4
235
235
  summary: Compare two versions of a site with ease!