vore 0.2.0-arm64-darwin → 0.2.1-arm64-darwin
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/exe/vore-spider +0 -0
- data/lib/vore/crawler.rb +3 -3
- data/lib/vore/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 119b1cdc06e20e708ceda86505a02361c19bf0de881300222e164bac1bbe6cc6
|
4
|
+
data.tar.gz: 75febe4d2f4cd048722d94a2f265b6239db67f08282a26de70a5c85bc4735933
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 364c154a766444e457769925a2c0a4fe890c53fbd9a7726d9896afac6389807806a6c36a1a3e3d27ae5e7c8de837c0081dab4b8397e9f23a8a7655607f5c62d7
|
7
|
+
data.tar.gz: c32f3ba74037034ff6f957cd397a61ad6c8328a2f282196b45e69584c5693b7647e4dd1cad105aa7b20475106ebe9933f80b81585061f42338b073005b7fe217
|
data/exe/vore-spider
CHANGED
Binary file
|
data/lib/vore/crawler.rb
CHANGED
@@ -17,7 +17,7 @@ module Vore
|
|
17
17
|
@selma = Selma::Rewriter.new(sanitizer: Selma::Sanitizer.new(sanitization_config), handlers: [@content_extractor])
|
18
18
|
ext = PLATFORM.include?("windows") ? ".exe" : ""
|
19
19
|
@executable = File.expand_path([__FILE__, "..", "..", "..", "exe", "vore-spider#{ext}"].join(FILE_SEPERATOR))
|
20
|
-
@
|
20
|
+
@parent_output_dir = "tmp/vore"
|
21
21
|
|
22
22
|
return if File.exist?(@executable)
|
23
23
|
|
@@ -26,7 +26,7 @@ module Vore
|
|
26
26
|
end
|
27
27
|
|
28
28
|
def scrape_each_page(website, &block)
|
29
|
-
output_dir = "#{@
|
29
|
+
output_dir = "#{@parent_output_dir}/#{website.gsub(/[^a-zA-Z0-9]/, "_").squeeze("_")}"
|
30
30
|
Vore.logger.info("Vore started crawling #{website}, outputting to #{output_dir}")
|
31
31
|
|
32
32
|
output = %x(#{@executable} \
|
@@ -38,7 +38,7 @@ module Vore
|
|
38
38
|
|
39
39
|
Vore.logger.info("Vore finished crawling #{website}: #{output}")
|
40
40
|
|
41
|
-
Dir.glob("
|
41
|
+
Dir.glob(File.join(output_dir, "**", "*")).each do |path|
|
42
42
|
next unless File.file?(path)
|
43
43
|
|
44
44
|
html_file = File.read(path).force_encoding("UTF-8")
|
data/lib/vore/version.rb
CHANGED