sitediff 1.2.0 → 1.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2e37a67bb4f0d7b93f252940d4ee0e4e3184dc27a435626f995b70bad4a7fc40
4
- data.tar.gz: 692d0a82b230e2dbab10fe8f6ee8591ca0128998f4c9acd6b97a37a90856c887
3
+ metadata.gz: dff12d889984ec88ad662c2a0f2f3e0771b2a2c6cbc8e5f0442773ab36a51e7c
4
+ data.tar.gz: 96541e827d456c925677821c501297b68b284828584f35e499b4b35da75f962f
5
5
  SHA512:
6
- metadata.gz: 72551efe76eaa6a4a23aeacba6b25cb3bb8b4483d27299846f55ba7a7bad8e256f798de23db36f501a6739875494be808541834bfaadf587fbbd078b6fa62506
7
- data.tar.gz: 977634a139f70794aa5015e42e014e542391f5cf3c07bd34bff1f7128514051c341512d607f1cb9dfb0326c2bee0c80b2c6b4b119e21897adb5bf516fa7a3140
6
+ metadata.gz: 3044d99f7494697d817f4ab545308987dbcaebd007f531f9113c2298f3f1550952967f92f0223a96782efed424b4c2ca97123fb0fce20a382b955086afef3386
7
+ data.tar.gz: 7715c7285734dad676fe95cc4fc4b6cd69411a073a4c3bca9b839ff152dfa427b0b6655159e54fc7a6e82c6ac3c0c5c9c86f312c9c7d287fc5101ec6cff0d23b
data/CHANGELOG.md CHANGED
@@ -2,6 +2,11 @@
2
2
 
3
3
  Contains noteworthy changes made to SiteDiff.
4
4
 
5
+ ## Version 1.2.1
6
+ - Fixed a bug with report exporting.
7
+ - Prevents crawling the same site twice if the before and after urls are the same.
8
+ - Adding a referrer to the crawler errors.
9
+
5
10
  ## Version 1.2.0
6
11
  - Updated requirement to Ruby 3.1.2.
7
12
  - Upgraded modules for security and compatibility.
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- sitediff (1.2.0)
4
+ sitediff (1.2.1)
5
5
  addressable (>= 2.5.2, < 2.9.0)
6
6
  diffy (~> 3.4.0)
7
7
  minitar (~> 0.9)
data/INSTALLATION.md CHANGED
@@ -36,7 +36,7 @@ avoid using `sudo` for `gem install`.
36
36
 
37
37
  ```bash
38
38
  gem install nokogiri --no-rdoc --no-ri -- --use-system-libraries=true —with-xml2-include=/usr/include/libxml2
39
- gem install sitediff -v '1.2.0'
39
+ gem install sitediff -v '1.2.1'
40
40
  ```
41
41
 
42
42
  ## Docker
@@ -88,7 +88,7 @@ If possible avoid using `sudo` for `gem install`.
88
88
 
89
89
  ```bash
90
90
  gem install nokogiri --no-rdoc --no-ri -- --use-system-libraries=true —with-xml2-include=/usr/include/libxml2
91
- gem install sitediff -v '1.2.0'
91
+ gem install sitediff -v '1.2.1'
92
92
  ```
93
93
 
94
94
  ## Ubuntu
data/lib/sitediff/api.rb CHANGED
@@ -159,7 +159,13 @@ class SiteDiff
159
159
  max_concurrency: @config.setting(:concurrency)
160
160
  )
161
161
  @paths = {}
162
- @config.roots.each do |tag, url|
162
+
163
+ ignoreAfter = @config.roots
164
+ if @config.roots['before'] == @config.roots['after']
165
+ ignoreAfter.delete('after')
166
+ end
167
+
168
+ ignoreAfter.each do |tag, url|
163
169
  Crawler.new(
164
170
  hydra,
165
171
  url,
@@ -34,16 +34,16 @@ class SiteDiff
34
34
  @curl_opts = curl_opts
35
35
  @debug = debug
36
36
 
37
- add_uri('', depth)
37
+ add_uri('', depth, referrer: '/')
38
38
  end
39
39
 
40
40
  # Handle a newly found relative URI
41
- def add_uri(rel, depth)
41
+ def add_uri(rel, depth, referrer = '')
42
42
  return if @found.include? rel
43
43
 
44
44
  @found << rel
45
45
 
46
- wrapper = UriWrapper.new(@base + rel, @curl_opts, debug: @debug)
46
+ wrapper = UriWrapper.new(@base + rel, @curl_opts, debug: @debug, referrer: referrer)
47
47
  wrapper.queue(@hydra) do |res|
48
48
  fetched_uri(rel, depth, res)
49
49
  end
@@ -90,7 +90,7 @@ class SiteDiff
90
90
  rels.each do |r|
91
91
  next if @found.include? r
92
92
 
93
- add_uri(r, depth - 1)
93
+ add_uri(r, depth - 1, rel)
94
94
  end
95
95
  end
96
96
 
@@ -147,7 +147,7 @@ class SiteDiff
147
147
  temp_path.mkpath
148
148
  report_path = temp_path + REPORT_DIR
149
149
  report_path.mkpath
150
- files_path = "#{report_path}files"
150
+ files_path = report_path + "files"
151
151
  files_path.mkpath
152
152
  diffs_path = dir + DIFFS_DIR
153
153
 
@@ -48,12 +48,13 @@ class SiteDiff
48
48
 
49
49
  ##
50
50
  # Creates a UriWrapper.
51
- def initialize(uri, curl_opts = DEFAULT_CURL_OPTS, debug: true)
51
+ def initialize(uri, curl_opts = DEFAULT_CURL_OPTS, debug: true, referrer: '')
52
52
  @uri = uri.respond_to?(:scheme) ? uri : Addressable::URI.parse(uri)
53
53
  # remove trailing '/'s from local URIs
54
54
  @uri.path.gsub!(%r{/*$}, '') if local?
55
55
  @curl_opts = curl_opts
56
56
  @debug = debug
57
+ @referrer = referrer
57
58
  end
58
59
 
59
60
  ##
@@ -136,31 +137,31 @@ class SiteDiff
136
137
  raise if @debug
137
138
 
138
139
  yield ReadResult.error(
139
- "Parsing error for #{@uri}: #{e.message}"
140
+ "Parsing error for #{@uri}: #{e.message} From page: #{@referrer}"
140
141
  )
141
142
  rescue StandardError => e
142
143
  raise if @debug
143
144
 
144
145
  yield ReadResult.error(
145
- "Unknown parsing error for #{@uri}: #{e.message}"
146
+ "Unknown parsing error for #{@uri}: #{e.message} From page: #{@referrer}"
146
147
  )
147
148
  end
148
149
  end
149
150
 
150
- req.on_failure do |resp|
151
+ req.on_failure do |resp|
151
152
  if resp&.status_message
152
153
  yield ReadResult.error(
153
- "HTTP error when loading #{@uri} : [#{resp.response_code}] #{resp.status_message}",
154
+ "HTTP error when loading #{@uri} : [#{resp.response_code}] #{resp.status_message} From page: #{@referrer}",
154
155
  resp.response_code
155
156
  )
156
157
  elsif (msg = resp.options[:return_code])
157
158
  yield ReadResult.error(
158
- "Connection error when loading #{@uri} : [#{resp.options[:return_code]}] #{resp.status_message} #{msg}",
159
+ "Connection error when loading #{@uri} : [#{resp.options[:return_code]}] #{msg} From page: #{@referrer}",
159
160
  resp.response_code
160
161
  )
161
162
  else
162
163
  yield ReadResult.error(
163
- "Unknown error when loading #{@uri} : [#{resp.response_code}] #{resp.status_message}",
164
+ "Unknown error when loading #{@uri} : [#{resp.response_code}] #{resp.status_message} From page: #{@referrer}",
164
165
  resp.response_code
165
166
  )
166
167
  end
data/sitediff.gemspec CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'sitediff'
5
- s.version = '1.2.0'
5
+ s.version = '1.2.1'
6
6
  s.required_ruby_version = '>= 3.1.2'
7
7
  s.summary = 'Compare two versions of a site with ease!'
8
8
  s.description = <<DESC
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sitediff
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0
4
+ version: 1.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alex Dergachev
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2022-08-29 00:00:00.000000000 Z
13
+ date: 2022-09-29 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: pkg-config