sitediff 1.2.0 → 1.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/Gemfile.lock +1 -1
- data/INSTALLATION.md +2 -2
- data/lib/sitediff/api.rb +7 -1
- data/lib/sitediff/crawler.rb +4 -4
- data/lib/sitediff/report.rb +1 -1
- data/lib/sitediff/uriwrapper.rb +8 -7
- data/sitediff.gemspec +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dff12d889984ec88ad662c2a0f2f3e0771b2a2c6cbc8e5f0442773ab36a51e7c
|
4
|
+
data.tar.gz: 96541e827d456c925677821c501297b68b284828584f35e499b4b35da75f962f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3044d99f7494697d817f4ab545308987dbcaebd007f531f9113c2298f3f1550952967f92f0223a96782efed424b4c2ca97123fb0fce20a382b955086afef3386
|
7
|
+
data.tar.gz: 7715c7285734dad676fe95cc4fc4b6cd69411a073a4c3bca9b839ff152dfa427b0b6655159e54fc7a6e82c6ac3c0c5c9c86f312c9c7d287fc5101ec6cff0d23b
|
data/CHANGELOG.md
CHANGED
@@ -2,6 +2,11 @@
|
|
2
2
|
|
3
3
|
Contains noteworthy changes made to SiteDiff.
|
4
4
|
|
5
|
+
## Version 1.2.1
|
6
|
+
- Fixed a bug with report exporting.
|
7
|
+
- Prevents crawling the same site twice if the before and after urls are the same.
|
8
|
+
- Adding a referrer to the crawler errors.
|
9
|
+
|
5
10
|
## Version 1.2.0
|
6
11
|
- Updated requirement to Ruby 3.1.2.
|
7
12
|
- Upgraded modules for security and compatibility.
|
data/Gemfile.lock
CHANGED
data/INSTALLATION.md
CHANGED
@@ -36,7 +36,7 @@ avoid using `sudo` for `gem install`.
|
|
36
36
|
|
37
37
|
```bash
|
38
38
|
gem install nokogiri --no-rdoc --no-ri -- --use-system-libraries=true —with-xml2-include=/usr/include/libxml2
|
39
|
-
gem install sitediff -v '1.2.
|
39
|
+
gem install sitediff -v '1.2.1'
|
40
40
|
```
|
41
41
|
|
42
42
|
## Docker
|
@@ -88,7 +88,7 @@ If possible avoid using `sudo` for `gem install`.
|
|
88
88
|
|
89
89
|
```bash
|
90
90
|
gem install nokogiri --no-rdoc --no-ri -- --use-system-libraries=true —with-xml2-include=/usr/include/libxml2
|
91
|
-
gem install sitediff -v '1.2.
|
91
|
+
gem install sitediff -v '1.2.1'
|
92
92
|
```
|
93
93
|
|
94
94
|
## Ubuntu
|
data/lib/sitediff/api.rb
CHANGED
@@ -159,7 +159,13 @@ class SiteDiff
|
|
159
159
|
max_concurrency: @config.setting(:concurrency)
|
160
160
|
)
|
161
161
|
@paths = {}
|
162
|
-
|
162
|
+
|
163
|
+
ignoreAfter = @config.roots
|
164
|
+
if @config.roots['before'] == @config.roots['after']
|
165
|
+
ignoreAfter.delete('after')
|
166
|
+
end
|
167
|
+
|
168
|
+
ignoreAfter.each do |tag, url|
|
163
169
|
Crawler.new(
|
164
170
|
hydra,
|
165
171
|
url,
|
data/lib/sitediff/crawler.rb
CHANGED
@@ -34,16 +34,16 @@ class SiteDiff
|
|
34
34
|
@curl_opts = curl_opts
|
35
35
|
@debug = debug
|
36
36
|
|
37
|
-
add_uri('', depth)
|
37
|
+
add_uri('', depth, referrer: '/')
|
38
38
|
end
|
39
39
|
|
40
40
|
# Handle a newly found relative URI
|
41
|
-
def add_uri(rel, depth)
|
41
|
+
def add_uri(rel, depth, referrer = '')
|
42
42
|
return if @found.include? rel
|
43
43
|
|
44
44
|
@found << rel
|
45
45
|
|
46
|
-
wrapper = UriWrapper.new(@base + rel, @curl_opts, debug: @debug)
|
46
|
+
wrapper = UriWrapper.new(@base + rel, @curl_opts, debug: @debug, referrer: referrer)
|
47
47
|
wrapper.queue(@hydra) do |res|
|
48
48
|
fetched_uri(rel, depth, res)
|
49
49
|
end
|
@@ -90,7 +90,7 @@ class SiteDiff
|
|
90
90
|
rels.each do |r|
|
91
91
|
next if @found.include? r
|
92
92
|
|
93
|
-
add_uri(r, depth - 1)
|
93
|
+
add_uri(r, depth - 1, rel)
|
94
94
|
end
|
95
95
|
end
|
96
96
|
|
data/lib/sitediff/report.rb
CHANGED
data/lib/sitediff/uriwrapper.rb
CHANGED
@@ -48,12 +48,13 @@ class SiteDiff
|
|
48
48
|
|
49
49
|
##
|
50
50
|
# Creates a UriWrapper.
|
51
|
-
def initialize(uri, curl_opts = DEFAULT_CURL_OPTS, debug: true)
|
51
|
+
def initialize(uri, curl_opts = DEFAULT_CURL_OPTS, debug: true, referrer: '')
|
52
52
|
@uri = uri.respond_to?(:scheme) ? uri : Addressable::URI.parse(uri)
|
53
53
|
# remove trailing '/'s from local URIs
|
54
54
|
@uri.path.gsub!(%r{/*$}, '') if local?
|
55
55
|
@curl_opts = curl_opts
|
56
56
|
@debug = debug
|
57
|
+
@referrer = referrer
|
57
58
|
end
|
58
59
|
|
59
60
|
##
|
@@ -136,31 +137,31 @@ class SiteDiff
|
|
136
137
|
raise if @debug
|
137
138
|
|
138
139
|
yield ReadResult.error(
|
139
|
-
"Parsing error for #{@uri}: #{e.message}"
|
140
|
+
"Parsing error for #{@uri}: #{e.message} From page: #{@referrer}"
|
140
141
|
)
|
141
142
|
rescue StandardError => e
|
142
143
|
raise if @debug
|
143
144
|
|
144
145
|
yield ReadResult.error(
|
145
|
-
"Unknown parsing error for #{@uri}: #{e.message}"
|
146
|
+
"Unknown parsing error for #{@uri}: #{e.message} From page: #{@referrer}"
|
146
147
|
)
|
147
148
|
end
|
148
149
|
end
|
149
150
|
|
150
|
-
req.on_failure do |resp|
|
151
|
+
req.on_failure do |resp|
|
151
152
|
if resp&.status_message
|
152
153
|
yield ReadResult.error(
|
153
|
-
"HTTP error when loading #{@uri} : [#{resp.response_code}] #{resp.status_message}",
|
154
|
+
"HTTP error when loading #{@uri} : [#{resp.response_code}] #{resp.status_message} From page: #{@referrer}",
|
154
155
|
resp.response_code
|
155
156
|
)
|
156
157
|
elsif (msg = resp.options[:return_code])
|
157
158
|
yield ReadResult.error(
|
158
|
-
"Connection error when loading #{@uri} : [#{resp.options[:return_code]}] #{
|
159
|
+
"Connection error when loading #{@uri} : [#{resp.options[:return_code]}] #{msg} From page: #{@referrer}",
|
159
160
|
resp.response_code
|
160
161
|
)
|
161
162
|
else
|
162
163
|
yield ReadResult.error(
|
163
|
-
"Unknown error when loading #{@uri} : [#{resp.response_code}] #{resp.status_message}",
|
164
|
+
"Unknown error when loading #{@uri} : [#{resp.response_code}] #{resp.status_message} From page: #{@referrer}",
|
164
165
|
resp.response_code
|
165
166
|
)
|
166
167
|
end
|
data/sitediff.gemspec
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sitediff
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.
|
4
|
+
version: 1.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alex Dergachev
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2022-
|
13
|
+
date: 2022-09-29 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: pkg-config
|