sitediff 1.2.0 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2e37a67bb4f0d7b93f252940d4ee0e4e3184dc27a435626f995b70bad4a7fc40
4
- data.tar.gz: 692d0a82b230e2dbab10fe8f6ee8591ca0128998f4c9acd6b97a37a90856c887
3
+ metadata.gz: dff12d889984ec88ad662c2a0f2f3e0771b2a2c6cbc8e5f0442773ab36a51e7c
4
+ data.tar.gz: 96541e827d456c925677821c501297b68b284828584f35e499b4b35da75f962f
5
5
  SHA512:
6
- metadata.gz: 72551efe76eaa6a4a23aeacba6b25cb3bb8b4483d27299846f55ba7a7bad8e256f798de23db36f501a6739875494be808541834bfaadf587fbbd078b6fa62506
7
- data.tar.gz: 977634a139f70794aa5015e42e014e542391f5cf3c07bd34bff1f7128514051c341512d607f1cb9dfb0326c2bee0c80b2c6b4b119e21897adb5bf516fa7a3140
6
+ metadata.gz: 3044d99f7494697d817f4ab545308987dbcaebd007f531f9113c2298f3f1550952967f92f0223a96782efed424b4c2ca97123fb0fce20a382b955086afef3386
7
+ data.tar.gz: 7715c7285734dad676fe95cc4fc4b6cd69411a073a4c3bca9b839ff152dfa427b0b6655159e54fc7a6e82c6ac3c0c5c9c86f312c9c7d287fc5101ec6cff0d23b
data/CHANGELOG.md CHANGED
@@ -2,6 +2,11 @@
2
2
 
3
3
  Contains noteworthy changes made to SiteDiff.
4
4
 
5
+ ## Version 1.2.1
6
+ - Fixed a bug with report exporting.
7
+ - Prevents crawling the same site twice if the before and after urls are the same.
8
+ - Adding a referrer to the crawler errors.
9
+
5
10
  ## Version 1.2.0
6
11
  - Updated requirement to Ruby 3.1.2.
7
12
  - Upgraded modules for security and compatibility.
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- sitediff (1.2.0)
4
+ sitediff (1.2.1)
5
5
  addressable (>= 2.5.2, < 2.9.0)
6
6
  diffy (~> 3.4.0)
7
7
  minitar (~> 0.9)
data/INSTALLATION.md CHANGED
@@ -36,7 +36,7 @@ avoid using `sudo` for `gem install`.
36
36
 
37
37
  ```bash
38
38
  gem install nokogiri --no-rdoc --no-ri -- --use-system-libraries=true —with-xml2-include=/usr/include/libxml2
39
- gem install sitediff -v '1.2.0'
39
+ gem install sitediff -v '1.2.1'
40
40
  ```
41
41
 
42
42
  ## Docker
@@ -88,7 +88,7 @@ If possible avoid using `sudo` for `gem install`.
88
88
 
89
89
  ```bash
90
90
  gem install nokogiri --no-rdoc --no-ri -- --use-system-libraries=true —with-xml2-include=/usr/include/libxml2
91
- gem install sitediff -v '1.2.0'
91
+ gem install sitediff -v '1.2.1'
92
92
  ```
93
93
 
94
94
  ## Ubuntu
data/lib/sitediff/api.rb CHANGED
@@ -159,7 +159,13 @@ class SiteDiff
159
159
  max_concurrency: @config.setting(:concurrency)
160
160
  )
161
161
  @paths = {}
162
- @config.roots.each do |tag, url|
162
+
163
+ ignoreAfter = @config.roots
164
+ if @config.roots['before'] == @config.roots['after']
165
+ ignoreAfter.delete('after')
166
+ end
167
+
168
+ ignoreAfter.each do |tag, url|
163
169
  Crawler.new(
164
170
  hydra,
165
171
  url,
@@ -34,16 +34,16 @@ class SiteDiff
34
34
  @curl_opts = curl_opts
35
35
  @debug = debug
36
36
 
37
- add_uri('', depth)
37
+ add_uri('', depth, referrer: '/')
38
38
  end
39
39
 
40
40
  # Handle a newly found relative URI
41
- def add_uri(rel, depth)
41
+ def add_uri(rel, depth, referrer = '')
42
42
  return if @found.include? rel
43
43
 
44
44
  @found << rel
45
45
 
46
- wrapper = UriWrapper.new(@base + rel, @curl_opts, debug: @debug)
46
+ wrapper = UriWrapper.new(@base + rel, @curl_opts, debug: @debug, referrer: referrer)
47
47
  wrapper.queue(@hydra) do |res|
48
48
  fetched_uri(rel, depth, res)
49
49
  end
@@ -90,7 +90,7 @@ class SiteDiff
90
90
  rels.each do |r|
91
91
  next if @found.include? r
92
92
 
93
- add_uri(r, depth - 1)
93
+ add_uri(r, depth - 1, rel)
94
94
  end
95
95
  end
96
96
 
@@ -147,7 +147,7 @@ class SiteDiff
147
147
  temp_path.mkpath
148
148
  report_path = temp_path + REPORT_DIR
149
149
  report_path.mkpath
150
- files_path = "#{report_path}files"
150
+ files_path = report_path + "files"
151
151
  files_path.mkpath
152
152
  diffs_path = dir + DIFFS_DIR
153
153
 
@@ -48,12 +48,13 @@ class SiteDiff
48
48
 
49
49
  ##
50
50
  # Creates a UriWrapper.
51
- def initialize(uri, curl_opts = DEFAULT_CURL_OPTS, debug: true)
51
+ def initialize(uri, curl_opts = DEFAULT_CURL_OPTS, debug: true, referrer: '')
52
52
  @uri = uri.respond_to?(:scheme) ? uri : Addressable::URI.parse(uri)
53
53
  # remove trailing '/'s from local URIs
54
54
  @uri.path.gsub!(%r{/*$}, '') if local?
55
55
  @curl_opts = curl_opts
56
56
  @debug = debug
57
+ @referrer = referrer
57
58
  end
58
59
 
59
60
  ##
@@ -136,31 +137,31 @@ class SiteDiff
136
137
  raise if @debug
137
138
 
138
139
  yield ReadResult.error(
139
- "Parsing error for #{@uri}: #{e.message}"
140
+ "Parsing error for #{@uri}: #{e.message} From page: #{@referrer}"
140
141
  )
141
142
  rescue StandardError => e
142
143
  raise if @debug
143
144
 
144
145
  yield ReadResult.error(
145
- "Unknown parsing error for #{@uri}: #{e.message}"
146
+ "Unknown parsing error for #{@uri}: #{e.message} From page: #{@referrer}"
146
147
  )
147
148
  end
148
149
  end
149
150
 
150
- req.on_failure do |resp|
151
+ req.on_failure do |resp|
151
152
  if resp&.status_message
152
153
  yield ReadResult.error(
153
- "HTTP error when loading #{@uri} : [#{resp.response_code}] #{resp.status_message}",
154
+ "HTTP error when loading #{@uri} : [#{resp.response_code}] #{resp.status_message} From page: #{@referrer}",
154
155
  resp.response_code
155
156
  )
156
157
  elsif (msg = resp.options[:return_code])
157
158
  yield ReadResult.error(
158
- "Connection error when loading #{@uri} : [#{resp.options[:return_code]}] #{resp.status_message} #{msg}",
159
+ "Connection error when loading #{@uri} : [#{resp.options[:return_code]}] #{msg} From page: #{@referrer}",
159
160
  resp.response_code
160
161
  )
161
162
  else
162
163
  yield ReadResult.error(
163
- "Unknown error when loading #{@uri} : [#{resp.response_code}] #{resp.status_message}",
164
+ "Unknown error when loading #{@uri} : [#{resp.response_code}] #{resp.status_message} From page: #{@referrer}",
164
165
  resp.response_code
165
166
  )
166
167
  end
data/sitediff.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'sitediff'
5
- s.version = '1.2.0'
5
+ s.version = '1.2.1'
6
6
  s.required_ruby_version = '>= 3.1.2'
7
7
  s.summary = 'Compare two versions of a site with ease!'
8
8
  s.description = <<DESC
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sitediff
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0
4
+ version: 1.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alex Dergachev
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2022-08-29 00:00:00.000000000 Z
13
+ date: 2022-09-29 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: pkg-config