sitediff 1.2.1 → 1.2.5a

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: dff12d889984ec88ad662c2a0f2f3e0771b2a2c6cbc8e5f0442773ab36a51e7c
4
- data.tar.gz: 96541e827d456c925677821c501297b68b284828584f35e499b4b35da75f962f
3
+ metadata.gz: 5dbab719d51bbf4922bbf90d806fab06801a8c29d1e0697655121a16634f84d7
4
+ data.tar.gz: cc42c00cd4d36393723bc28cb204851bdf86535cee30519d1cd9367c6f6e06b9
5
5
  SHA512:
6
- metadata.gz: 3044d99f7494697d817f4ab545308987dbcaebd007f531f9113c2298f3f1550952967f92f0223a96782efed424b4c2ca97123fb0fce20a382b955086afef3386
7
- data.tar.gz: 7715c7285734dad676fe95cc4fc4b6cd69411a073a4c3bca9b839ff152dfa427b0b6655159e54fc7a6e82c6ac3c0c5c9c86f312c9c7d287fc5101ec6cff0d23b
6
+ metadata.gz: ed9dfaad56b3a52761a6df4fee893d871931632cb32bc526574d0988ac5599ff09b352c785ed35f8a8717844438b65ef432f9c6a4258c756b3518a12c3b3f6da
7
+ data.tar.gz: 011c5c381479cbd7fc95b8a82de66708f503b649f6b840b32d1cb06b4e50471bb92eb34ee426f5b879863a949fe339a5d3f9e317f9286401b6666fa12de7d6ee
data/CHANGELOG.md CHANGED
@@ -1,6 +1,22 @@
1
1
  # SiteDiff Change Log
2
2
 
3
3
  Contains noteworthy changes made to SiteDiff.
4
+ ## Version 1.2.5
5
+ - Fix issue with whitespace in URLs.
6
+ - Updates for Drupal preset for Drupal 8, 9, 10.
7
+ - Bump nokogiri from 1.14.2 to 1.14.3
8
+ - Fix basic auth derived from URL syntax
9
+
10
+ ## Version 1.2.4
11
+ - Fix issue with 'store' command.
12
+
13
+ ## Version 1.2.3
14
+ - Fix issue with nil object during diff report generation.
15
+ - Update to export documentation.
16
+
17
+ ## Version 1.2.2
18
+ - Security update for Nokogiri.
19
+ - Minor code updates.
4
20
 
5
21
  ## Version 1.2.1
6
22
  - Fixed a bug with report exporting.
@@ -30,4 +46,4 @@ Contains noteworthy changes made to SiteDiff.
30
46
 
31
47
  ## Prior to 1.0.0
32
48
 
33
- Release notes were out of date, so only tracking changes since 1.0.0 here.
49
+ Release notes were out of date, so only tracking changes since 1.0.0 here.
data/Dockerfile CHANGED
@@ -10,10 +10,10 @@ ARG DEBIAN_FRONTEND=noninteractive
10
10
  # Our build requires rake
11
11
  # Install editors: vim, nano.
12
12
  RUN apt-get update
13
- RUN apt-get install -y apt-utils
14
- RUN apt-get install -y software-properties-common
15
- RUN apt-get install -y make pkg-config libxml2-dev libxslt-dev
16
- RUN apt-get install -y vim nano git
13
+ RUN apt-get install -y apt-utils \
14
+ software-properties-common \
15
+ make pkg-config libxml2-dev libxslt-dev \
16
+ vim nano git
17
17
 
18
18
  # Force nokogiri gem not to compile libxml2, it takes too long
19
19
  ENV NOKOGIRI_USE_SYSTEM_LIBRARIES 1
data/Gemfile.lock CHANGED
@@ -1,41 +1,39 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- sitediff (1.2.1)
4
+ sitediff (1.2.5a)
5
5
  addressable (>= 2.5.2, < 2.9.0)
6
6
  diffy (~> 3.4.0)
7
7
  minitar (~> 0.9)
8
- nokogiri (>= 1.13.6)
8
+ nokogiri (>= 1.14.2)
9
9
  pkg-config (~> 1.4)
10
10
  rainbow (~> 3.1.1)
11
11
  thor (~> 1.2.1)
12
- typhoeus (~> 1.4.0)
12
+ typhoeus (>= 1.1.1)
13
13
  webrick (>= 1.7)
14
14
 
15
15
  GEM
16
16
  remote: https://rubygems.org/
17
17
  specs:
18
- addressable (2.8.0)
19
- public_suffix (>= 2.0.2, < 5.0)
18
+ addressable (2.8.1)
19
+ public_suffix (>= 2.0.2, < 6.0)
20
20
  ast (2.4.2)
21
21
  diff-lcs (1.5.0)
22
22
  diffy (3.4.2)
23
- ethon (0.15.0)
23
+ ethon (0.16.0)
24
24
  ffi (>= 1.15.0)
25
25
  ffi (1.15.5)
26
26
  fileutils (1.1.0)
27
27
  json (2.6.2)
28
- mini_portile2 (2.8.0)
29
28
  minitar (0.9)
30
- nokogiri (1.13.6)
31
- mini_portile2 (~> 2.8.0)
29
+ nokogiri (1.14.2-arm64-darwin)
32
30
  racc (~> 1.4)
33
31
  parallel (1.22.1)
34
32
  parser (3.1.2.0)
35
33
  ast (~> 2.4.1)
36
- pkg-config (1.4.7)
37
- public_suffix (4.0.7)
38
- racc (1.6.0)
34
+ pkg-config (1.5.1)
35
+ public_suffix (5.0.1)
36
+ racc (1.6.2)
39
37
  rainbow (3.1.1)
40
38
  regexp_parser (2.5.0)
41
39
  rexml (3.2.5)
@@ -66,13 +64,14 @@ GEM
66
64
  parser (>= 3.1.1.0)
67
65
  ruby-progressbar (1.11.0)
68
66
  thor (1.2.1)
69
- typhoeus (1.4.0)
70
- ethon (>= 0.9.0)
67
+ typhoeus (1.4.1)
68
+ ethon (= 0.16.0)
71
69
  unicode-display_width (2.2.0)
72
- webrick (1.7.0)
70
+ webrick (1.8.1)
73
71
 
74
72
  PLATFORMS
75
73
  ruby
74
+ x86_64-linux
76
75
 
77
76
  DEPENDENCIES
78
77
  fileutils (= 1.1.0)
data/README.md CHANGED
@@ -294,6 +294,13 @@ Generate a gzipped tar file containing the HTML report instead of generating
294
294
  and serving live web pages, this option overrides `--report-format`, forcing
295
295
  HTML.
296
296
 
297
+ ```
298
+ sitediff diff --export
299
+ sitediff diff -e
300
+ ```
301
+
302
+ This will perform the diff and export the results in a gzipped tar file.
303
+
297
304
  ### Running inside containers
298
305
 
299
306
  If you run SiteDiff inside a container or virtual machine, the URLs in its
@@ -441,7 +448,7 @@ before comparison:
441
448
  dom_transform:
442
449
  # Remove current time block
443
450
  - type: remove
444
- - selector: div#block-time
451
+ selector: div#block-time
445
452
  ```
446
453
 
447
454
  #### strip
@@ -458,7 +465,7 @@ To transform `<h1> Foo and Bar\n </h1>` to `<h1>Foo and Bar<\h1>`:
458
465
  dom_transform:
459
466
  # Strip H1 tags
460
467
  - type: strip
461
- - selector: h1
468
+ selector: h1
462
469
  ```
463
470
 
464
471
  #### unwrap
@@ -655,7 +662,7 @@ EG:
655
662
  </div>
656
663
  </region>
657
664
  <region id="body">
658
- <div class=".field-name-attribution">
665
+ <div class="field-name-attribution">
659
666
  <p>Lorem ipsum...
660
667
  </div>
661
668
  </region>
data/lib/sitediff/api.rb CHANGED
@@ -160,12 +160,12 @@ class SiteDiff
160
160
  )
161
161
  @paths = {}
162
162
 
163
- ignoreAfter = @config.roots
163
+ ignore_after = @config.roots
164
164
  if @config.roots['before'] == @config.roots['after']
165
- ignoreAfter.delete('after')
165
+ ignore_after.delete('after')
166
166
  end
167
-
168
- ignoreAfter.each do |tag, url|
167
+
168
+ ignore_after.each do |tag, url|
169
169
  Crawler.new(
170
170
  hydra,
171
171
  url,
@@ -184,6 +184,10 @@ class SiteDiff
184
184
 
185
185
  # Write paths to a file.
186
186
  @paths = @paths.values.reduce(&:|).to_a.sort
187
+ if @paths.none? | @paths.nil?
188
+ return
189
+ end
190
+
187
191
  @config.paths_file_write(@paths)
188
192
 
189
193
  # Log output.
@@ -230,7 +234,7 @@ class SiteDiff
230
234
  @config.setting(:interval),
231
235
  @config.setting(:concurrency),
232
236
  get_curl_opts(@config.settings),
233
- options[:debug],
237
+ debug: options[:debug],
234
238
  before: base)
235
239
  fetcher.run do |path, _res|
236
240
  SiteDiff.log "Visited #{path}, cached"
@@ -43,7 +43,7 @@ class SiteDiff
43
43
 
44
44
  @found << rel
45
45
 
46
- wrapper = UriWrapper.new(@base + rel, @curl_opts, debug: @debug, referrer: referrer)
46
+ wrapper = UriWrapper.new(@base + rel, @curl_opts, debug: @debug, referrer:)
47
47
  wrapper.queue(@hydra) do |res|
48
48
  fetched_uri(rel, depth, res)
49
49
  end
@@ -96,6 +96,7 @@ class SiteDiff
96
96
 
97
97
  # Resolve a potentially-relative link. Return nil on error.
98
98
  def resolve_link(base, rel)
99
+ rel = rel.strip
99
100
  base + rel
100
101
  rescue Addressable::URI::InvalidURIError
101
102
  SiteDiff.log "skipped invalid URL: '#{rel}' (at #{base})", :warning
@@ -129,6 +130,7 @@ class SiteDiff
129
130
  u.path.start_with?(@base_uri.path)
130
131
  next unless is_sub_uri
131
132
 
133
+ # puts "Trying regex #{u.path}"
132
134
  is_included = @include_regex.nil? ? false : @include_regex.match(u.path)
133
135
  is_excluded = @exclude_regex.nil? ? false : @exclude_regex.match(u.path)
134
136
  if is_excluded && !is_included
@@ -4,6 +4,8 @@
4
4
 
5
5
  # Pages compared.
6
6
  compared_pages = results.length
7
+
8
+ url_hash = '?' + Time.now.strftime("%s%L")
7
9
  %>
8
10
  <!DOCTYPE html>
9
11
  <html>
@@ -134,17 +136,17 @@
134
136
  <div class="buttons">
135
137
  <% unless relative %>
136
138
  <% unless report['before_url_report'] === false %>
137
- <a href="<%= result.url(:before, before_url_report || before, cache) %>" class="button-before" target="_blank">Before</a>
139
+ <a href="<%= result.url(:before, before_url_report || before, cache) + url_hash %>" class="button-before" target="_blank">Before</a>
138
140
  <% end %>
139
141
  <% unless report['after_url_report'] === false %>
140
- <a href="<%= result.url(:after, after_url_report || after, cache) %>" class="button-after" target="_blank">After</a>
142
+ <a href="<%= result.url(:after, after_url_report || after, cache) + url_hash %>" class="button-after" target="_blank">After</a>
141
143
  <% end %>
142
144
  <% unless report['before_url_report'] === false || report['after_url_report'] === false %>
143
- <a href="/sidebyside<%= result.path %>" class="button-both">Both</a>
145
+ <a href="/sidebyside<%= result.path + url_hash %>" class="button-both">Both</a>
144
146
  <% end %>
145
147
  <% end %>
146
148
  <% unless result.diff_url.nil? %>
147
- <a href="<%= result.diff_url(relative: relative) %>" class="button button-diff">View diff</a>
149
+ <a href="<%= result.diff_url(relative: relative) + url_hash %>" class="button button-diff">View diff</a>
148
150
  <% end %>
149
151
  </div>
150
152
  </td>
@@ -29,7 +29,7 @@ sanitization:
29
29
  pattern: '(src="[^"]*/misc/\w+\.js)?v=\d+\.\d+"'
30
30
  substitute: '\1'
31
31
  - title: Strip domain names from absolute URLs
32
- pattern: 'http:\/\/[a-zA-Z0-9.:-]+'
32
+ pattern: 'https?:\/\/[a-zA-Z0-9.:-]+'
33
33
  substitute: '__domain__'
34
34
  - title: Strip form build ID
35
35
  selector: input
@@ -61,3 +61,27 @@ sanitization:
61
61
  selector: script
62
62
  pattern: 'js_[-\w]{40,43}\\?\.js'
63
63
  substitute: 'js__ID__.js'
64
+ - pattern: ' data-contextual-token="[^\"]*"'
65
+ selector: 'div'
66
+ substitute: ''
67
+ - pattern: ' data-drupal-selector="[^\"]*"'
68
+ selector: 'div'
69
+ substitute: ''
70
+ - pattern: ' title="[^\"]*"'
71
+ selector: 'input'
72
+ substitute: ''
73
+ - pattern: 'js-view-dom-id-[a-zA-Z0-9]+'
74
+ selector: 'div'
75
+ substitute: 'js-view-dom-id-__HASH__'
76
+ - pattern: 'value="[^\"]*"'
77
+ selector: 'input'
78
+ substitute: 'value="HASH"'
79
+ - pattern: 'itok=[A-Za-z0-9\-_]+'
80
+ substitute: ''
81
+ - pattern: 'hash=[^"]+"'
82
+ selector: 'iframe'
83
+ substitute: 'hash=HASH'
84
+ dom_transform:
85
+ # Strip Drupal.settings (>8.0)
86
+ - type: remove
87
+ selector: 'script[data-drupal-selector="drupal-settings-json"]'
@@ -147,8 +147,8 @@ class SiteDiff
147
147
  temp_path.mkpath
148
148
  report_path = temp_path + REPORT_DIR
149
149
  report_path.mkpath
150
- files_path = report_path + "files"
151
- files_path.mkpath
150
+ files_path = "#{report_path}/files"
151
+ FileUtils.mkpath(files_path)
152
152
  diffs_path = dir + DIFFS_DIR
153
153
 
154
154
  # Move files to place.
@@ -220,10 +220,10 @@ class SiteDiff
220
220
 
221
221
  # Force this object to be a document, so we can apply a stylesheet
222
222
  def self.to_document(obj)
223
- if Nokogiri::XML::Document == obj.class || Nokogiri::HTML::Document == obj.class
223
+ if obj.instance_of?(Nokogiri::XML::Document) || obj.instance_of?(Nokogiri::HTML::Document)
224
224
  obj
225
225
  # node or fragment
226
- elsif Nokogiri::XML::Node == obj.class || Nokogiri::HTML::DocumentFragment == obj.class
226
+ elsif obj.instance_of?(Nokogiri::XML::Node) || obj.instance_of?(Nokogiri::HTML::DocumentFragment)
227
227
  domify(obj.to_s, force_doc: true)
228
228
  else
229
229
  to_document(domify(obj, force_doc: false))
@@ -117,7 +117,13 @@ class SiteDiff
117
117
  def typhoeus_request
118
118
  params = @curl_opts.dup
119
119
  # Allow basic auth
120
- params[:userpwd] = "#{@uri.user}: #{@uri.password}" if @uri.user
120
+ params[:userpwd] = "#{@uri.user}:#{@uri.password}" if @uri.user
121
+
122
+ # params['verbose'] = true
123
+ # params['ssl_verifypeer'] = false
124
+ # params['ssl_verifyhost'] = 0
125
+ # params['followlocation'] = true
126
+ # puts to_s
121
127
 
122
128
  req = Typhoeus::Request.new(to_s, params)
123
129
 
@@ -148,20 +154,20 @@ class SiteDiff
148
154
  end
149
155
  end
150
156
 
151
- req.on_failure do |resp|
157
+ req.on_failure do |resp|
152
158
  if resp&.status_message
153
159
  yield ReadResult.error(
154
- "HTTP error when loading #{@uri} : [#{resp.response_code}] #{resp.status_message} From page: #{@referrer}",
160
+ "HTTP error when loading #{@uri} : [#{resp.response_code}] #{resp.status_message} From: #{@referrer}",
155
161
  resp.response_code
156
162
  )
157
163
  elsif (msg = resp.options[:return_code])
158
164
  yield ReadResult.error(
159
- "Connection error when loading #{@uri} : [#{resp.options[:return_code]}] #{msg} From page: #{@referrer}",
165
+ "Connection error when loading #{@uri} : [#{resp.options[:return_code]}] #{msg} From: #{@referrer}",
160
166
  resp.response_code
161
167
  )
162
168
  else
163
169
  yield ReadResult.error(
164
- "Unknown error when loading #{@uri} : [#{resp.response_code}] #{resp.status_message} From page: #{@referrer}",
170
+ "Unknown error when loading #{@uri} : [#{resp.response_code}] #{resp.status_message} From: #{@referrer}",
165
171
  resp.response_code
166
172
  )
167
173
  end
data/lib/sitediff.rb CHANGED
@@ -137,7 +137,7 @@ class SiteDiff
137
137
  rescue StandardError => e
138
138
  raise if @debug
139
139
 
140
- Result.new(path, nil, nil, nil, nil, "Sanitization error: #{e}")
140
+ diff = Result.new(path, nil, nil, nil, nil, "Sanitization error: #{e.message}")
141
141
  end
142
142
  end
143
143
  @results[path] = diff
data/package-lock.json CHANGED
@@ -588,9 +588,9 @@
588
588
  "dev": true
589
589
  },
590
590
  "minimatch": {
591
- "version": "3.0.4",
592
- "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.0.4.tgz",
593
- "integrity": "sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==",
591
+ "version": "3.1.2",
592
+ "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
593
+ "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==",
594
594
  "dev": true,
595
595
  "requires": {
596
596
  "brace-expansion": "^1.1.7"
data/sitediff.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'sitediff'
5
- s.version = '1.2.1'
5
+ s.version = '1.2.5a'
6
6
  s.required_ruby_version = '>= 3.1.2'
7
7
  s.summary = 'Compare two versions of a site with ease!'
8
8
  s.description = <<DESC
@@ -35,14 +35,14 @@ DESC
35
35
 
36
36
  s.add_dependency 'minitar', '~> 0.9'
37
37
  s.add_dependency 'thor', '~> 1.2.1'
38
- s.add_dependency 'typhoeus', '~> 1.4.0'
38
+ s.add_dependency 'typhoeus', '>= 1.1.1'
39
39
 
40
40
  # A bug in rubygems can break rainbow 2.2
41
41
  # https://github.com/bundler/bundler/issues/5357
42
42
  s.add_dependency 'rainbow', '~> 3.1.1'
43
43
 
44
44
  # Nokogiri 1.7 is not supported on Ruby 2.0.
45
- s.add_dependency 'nokogiri', '>= 1.13.6'
45
+ s.add_dependency 'nokogiri', '>= 1.14.2'
46
46
 
47
47
  # Diffy and addressable have a max version for Ruby 1.9.
48
48
  s.add_dependency 'addressable', '>= 2.5.2', '< 2.9.0'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sitediff
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.1
4
+ version: 1.2.5a
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alex Dergachev
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2022-09-29 00:00:00.000000000 Z
13
+ date: 2023-05-14 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: pkg-config
@@ -58,16 +58,16 @@ dependencies:
58
58
  name: typhoeus
59
59
  requirement: !ruby/object:Gem::Requirement
60
60
  requirements:
61
- - - "~>"
61
+ - - ">="
62
62
  - !ruby/object:Gem::Version
63
- version: 1.4.0
63
+ version: 1.1.1
64
64
  type: :runtime
65
65
  prerelease: false
66
66
  version_requirements: !ruby/object:Gem::Requirement
67
67
  requirements:
68
- - - "~>"
68
+ - - ">="
69
69
  - !ruby/object:Gem::Version
70
- version: 1.4.0
70
+ version: 1.1.1
71
71
  - !ruby/object:Gem::Dependency
72
72
  name: rainbow
73
73
  requirement: !ruby/object:Gem::Requirement
@@ -88,14 +88,14 @@ dependencies:
88
88
  requirements:
89
89
  - - ">="
90
90
  - !ruby/object:Gem::Version
91
- version: 1.13.6
91
+ version: 1.14.2
92
92
  type: :runtime
93
93
  prerelease: false
94
94
  version_requirements: !ruby/object:Gem::Requirement
95
95
  requirements:
96
96
  - - ">="
97
97
  - !ruby/object:Gem::Version
98
- version: 1.13.6
98
+ version: 1.14.2
99
99
  - !ruby/object:Gem::Dependency
100
100
  name: addressable
101
101
  requirement: !ruby/object:Gem::Requirement
@@ -225,11 +225,11 @@ required_ruby_version: !ruby/object:Gem::Requirement
225
225
  version: 3.1.2
226
226
  required_rubygems_version: !ruby/object:Gem::Requirement
227
227
  requirements:
228
- - - ">="
228
+ - - ">"
229
229
  - !ruby/object:Gem::Version
230
- version: '0'
230
+ version: 1.3.1
231
231
  requirements: []
232
- rubygems_version: 3.3.7
232
+ rubygems_version: 3.4.13
233
233
  signing_key:
234
234
  specification_version: 4
235
235
  summary: Compare two versions of a site with ease!