vore 0.2.4-arm64-darwin → 0.2.6-arm64-darwin
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/exe/vore-spider +0 -0
- data/lib/vore/crawler.rb +14 -4
- data/lib/vore/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 402d817b4979ae3cb7caee99653fc2ddfccf726b53f49c76dc078d54a9868af3
|
4
|
+
data.tar.gz: 1768208e84f98fbbcb5e8bb683528f6f476c928d79c1e006cb1b6a8bf521cae7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '0091d5f55c923e91abfa57b01e7857072196c85e1053568b5799c715c6d188a27075a82c9393e9fdadade34ff02832c98780dcf9e1f70970f45db29a77fbc733'
|
7
|
+
data.tar.gz: 5915151dbdd4d055f5cf6305dada2a0a82d817a8563ddffab52582f6c580903a2f29269b81c3591c407775ab522d66e55c5a901d3afb4640aa0e959c56f88e2c
|
data/exe/vore-spider
CHANGED
Binary file
|
data/lib/vore/crawler.rb
CHANGED
@@ -31,7 +31,7 @@ module Vore
|
|
31
31
|
|
32
32
|
output = %x(#{@executable} \
|
33
33
|
--user-agent #{user_agent} \
|
34
|
-
--delay
|
34
|
+
--delay 3500 \
|
35
35
|
--url #{website} \
|
36
36
|
download \
|
37
37
|
-t \
|
@@ -48,16 +48,26 @@ module Vore
|
|
48
48
|
Dir.glob(File.join(output_dir, "**", "*")).each do |path|
|
49
49
|
next unless File.file?(path)
|
50
50
|
|
51
|
+
results[:pages_visited] += 1
|
52
|
+
|
51
53
|
html_file = File.read(path).force_encoding("UTF-8")
|
52
|
-
rewritten_html_file =
|
54
|
+
rewritten_html_file = ""
|
53
55
|
|
54
|
-
|
55
|
-
|
56
|
+
if html_file.empty?
|
57
|
+
Vore.logger.warn("HTML file empty: #{path}")
|
56
58
|
results[:pages_unprocessed] += 1
|
57
59
|
results[:unprocessed_pages] << path
|
58
60
|
next
|
59
61
|
end
|
60
62
|
|
63
|
+
begin
|
64
|
+
rewritten_html_file = @selma.rewrite(html_file)
|
65
|
+
rescue StandardError => e
|
66
|
+
Vore.logger.warn("Error rewriting #{path}: #{e}")
|
67
|
+
results[:pages_unprocessed] += 1
|
68
|
+
next
|
69
|
+
end
|
70
|
+
|
61
71
|
# drops the first 3 parts of the path, which are "tmp", "vore", and the site name
|
62
72
|
url_path = path.split(FILE_SEPERATOR)[3..].join("/")
|
63
73
|
|
data/lib/vore/version.rb
CHANGED