sitediff 1.2.0 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/Gemfile.lock +1 -1
- data/INSTALLATION.md +2 -2
- data/lib/sitediff/api.rb +7 -1
- data/lib/sitediff/crawler.rb +4 -4
- data/lib/sitediff/report.rb +1 -1
- data/lib/sitediff/uriwrapper.rb +8 -7
- data/sitediff.gemspec +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dff12d889984ec88ad662c2a0f2f3e0771b2a2c6cbc8e5f0442773ab36a51e7c
|
4
|
+
data.tar.gz: 96541e827d456c925677821c501297b68b284828584f35e499b4b35da75f962f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3044d99f7494697d817f4ab545308987dbcaebd007f531f9113c2298f3f1550952967f92f0223a96782efed424b4c2ca97123fb0fce20a382b955086afef3386
|
7
|
+
data.tar.gz: 7715c7285734dad676fe95cc4fc4b6cd69411a073a4c3bca9b839ff152dfa427b0b6655159e54fc7a6e82c6ac3c0c5c9c86f312c9c7d287fc5101ec6cff0d23b
|
data/CHANGELOG.md
CHANGED
@@ -2,6 +2,11 @@
|
|
2
2
|
|
3
3
|
Contains noteworthy changes made to SiteDiff.
|
4
4
|
|
5
|
+
## Version 1.2.1
|
6
|
+
- Fixed a bug with report exporting.
|
7
|
+
- Prevents crawling the same site twice if the before and after urls are the same.
|
8
|
+
- Adding a referrer to the crawler errors.
|
9
|
+
|
5
10
|
## Version 1.2.0
|
6
11
|
- Updated requirement to Ruby 3.1.2.
|
7
12
|
- Upgraded modules for security and compatibility.
|
data/Gemfile.lock
CHANGED
data/INSTALLATION.md
CHANGED
@@ -36,7 +36,7 @@ avoid using `sudo` for `gem install`.
|
|
36
36
|
|
37
37
|
```bash
|
38
38
|
gem install nokogiri --no-rdoc --no-ri -- --use-system-libraries=true —with-xml2-include=/usr/include/libxml2
|
39
|
-
gem install sitediff -v '1.2.
|
39
|
+
gem install sitediff -v '1.2.1'
|
40
40
|
```
|
41
41
|
|
42
42
|
## Docker
|
@@ -88,7 +88,7 @@ If possible avoid using `sudo` for `gem install`.
|
|
88
88
|
|
89
89
|
```bash
|
90
90
|
gem install nokogiri --no-rdoc --no-ri -- --use-system-libraries=true —with-xml2-include=/usr/include/libxml2
|
91
|
-
gem install sitediff -v '1.2.
|
91
|
+
gem install sitediff -v '1.2.1'
|
92
92
|
```
|
93
93
|
|
94
94
|
## Ubuntu
|
data/lib/sitediff/api.rb
CHANGED
@@ -159,7 +159,13 @@ class SiteDiff
|
|
159
159
|
max_concurrency: @config.setting(:concurrency)
|
160
160
|
)
|
161
161
|
@paths = {}
|
162
|
-
|
162
|
+
|
163
|
+
ignoreAfter = @config.roots
|
164
|
+
if @config.roots['before'] == @config.roots['after']
|
165
|
+
ignoreAfter.delete('after')
|
166
|
+
end
|
167
|
+
|
168
|
+
ignoreAfter.each do |tag, url|
|
163
169
|
Crawler.new(
|
164
170
|
hydra,
|
165
171
|
url,
|
data/lib/sitediff/crawler.rb
CHANGED
@@ -34,16 +34,16 @@ class SiteDiff
|
|
34
34
|
@curl_opts = curl_opts
|
35
35
|
@debug = debug
|
36
36
|
|
37
|
-
add_uri('', depth)
|
37
|
+
add_uri('', depth, referrer: '/')
|
38
38
|
end
|
39
39
|
|
40
40
|
# Handle a newly found relative URI
|
41
|
-
def add_uri(rel, depth)
|
41
|
+
def add_uri(rel, depth, referrer = '')
|
42
42
|
return if @found.include? rel
|
43
43
|
|
44
44
|
@found << rel
|
45
45
|
|
46
|
-
wrapper = UriWrapper.new(@base + rel, @curl_opts, debug: @debug)
|
46
|
+
wrapper = UriWrapper.new(@base + rel, @curl_opts, debug: @debug, referrer: referrer)
|
47
47
|
wrapper.queue(@hydra) do |res|
|
48
48
|
fetched_uri(rel, depth, res)
|
49
49
|
end
|
@@ -90,7 +90,7 @@ class SiteDiff
|
|
90
90
|
rels.each do |r|
|
91
91
|
next if @found.include? r
|
92
92
|
|
93
|
-
add_uri(r, depth - 1)
|
93
|
+
add_uri(r, depth - 1, rel)
|
94
94
|
end
|
95
95
|
end
|
96
96
|
|
data/lib/sitediff/report.rb
CHANGED
data/lib/sitediff/uriwrapper.rb
CHANGED
@@ -48,12 +48,13 @@ class SiteDiff
|
|
48
48
|
|
49
49
|
##
|
50
50
|
# Creates a UriWrapper.
|
51
|
-
def initialize(uri, curl_opts = DEFAULT_CURL_OPTS, debug: true)
|
51
|
+
def initialize(uri, curl_opts = DEFAULT_CURL_OPTS, debug: true, referrer: '')
|
52
52
|
@uri = uri.respond_to?(:scheme) ? uri : Addressable::URI.parse(uri)
|
53
53
|
# remove trailing '/'s from local URIs
|
54
54
|
@uri.path.gsub!(%r{/*$}, '') if local?
|
55
55
|
@curl_opts = curl_opts
|
56
56
|
@debug = debug
|
57
|
+
@referrer = referrer
|
57
58
|
end
|
58
59
|
|
59
60
|
##
|
@@ -136,31 +137,31 @@ class SiteDiff
|
|
136
137
|
raise if @debug
|
137
138
|
|
138
139
|
yield ReadResult.error(
|
139
|
-
"Parsing error for #{@uri}: #{e.message}"
|
140
|
+
"Parsing error for #{@uri}: #{e.message} From page: #{@referrer}"
|
140
141
|
)
|
141
142
|
rescue StandardError => e
|
142
143
|
raise if @debug
|
143
144
|
|
144
145
|
yield ReadResult.error(
|
145
|
-
"Unknown parsing error for #{@uri}: #{e.message}"
|
146
|
+
"Unknown parsing error for #{@uri}: #{e.message} From page: #{@referrer}"
|
146
147
|
)
|
147
148
|
end
|
148
149
|
end
|
149
150
|
|
150
|
-
req.on_failure do |resp|
|
151
|
+
req.on_failure do |resp|
|
151
152
|
if resp&.status_message
|
152
153
|
yield ReadResult.error(
|
153
|
-
"HTTP error when loading #{@uri} : [#{resp.response_code}] #{resp.status_message}",
|
154
|
+
"HTTP error when loading #{@uri} : [#{resp.response_code}] #{resp.status_message} From page: #{@referrer}",
|
154
155
|
resp.response_code
|
155
156
|
)
|
156
157
|
elsif (msg = resp.options[:return_code])
|
157
158
|
yield ReadResult.error(
|
158
|
-
"Connection error when loading #{@uri} : [#{resp.options[:return_code]}] #{
|
159
|
+
"Connection error when loading #{@uri} : [#{resp.options[:return_code]}] #{msg} From page: #{@referrer}",
|
159
160
|
resp.response_code
|
160
161
|
)
|
161
162
|
else
|
162
163
|
yield ReadResult.error(
|
163
|
-
"Unknown error when loading #{@uri} : [#{resp.response_code}] #{resp.status_message}",
|
164
|
+
"Unknown error when loading #{@uri} : [#{resp.response_code}] #{resp.status_message} From page: #{@referrer}",
|
164
165
|
resp.response_code
|
165
166
|
)
|
166
167
|
end
|
data/sitediff.gemspec
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sitediff
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.
|
4
|
+
version: 1.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alex Dergachev
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2022-
|
13
|
+
date: 2022-09-29 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: pkg-config
|