vore 0.2.0-arm64-linux → 0.2.1-arm64-linux
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/vore/crawler.rb +3 -3
- data/lib/vore/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 46df38a7d14df27f53723a4ce83c9a3d34d9a3f516a32a62b1d2150665c4b622
|
4
|
+
data.tar.gz: 96e6ba5043de693565bd96815391f91f043390278a786abe951a662cc8f1c064
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dcc8804b9f9cf57ecc7c87bfabec8416a8e12fc4987b713e2f3b16e6e2201a82b2be036590d37a9a6c4c8c58af9e6d52d9ed14efa4e84fd0472f7b0465290376
|
7
|
+
data.tar.gz: 5b2c0166e960100ca5f56d6df1c3f9223b9edcaf33a296338d640fd2bacdc4aab89eefcaa7799b46b97a2181d8e7f2cad9d0557e12ad54136f3efb9b7dfe0aa4
|
data/lib/vore/crawler.rb
CHANGED
@@ -17,7 +17,7 @@ module Vore
|
|
17
17
|
@selma = Selma::Rewriter.new(sanitizer: Selma::Sanitizer.new(sanitization_config), handlers: [@content_extractor])
|
18
18
|
ext = PLATFORM.include?("windows") ? ".exe" : ""
|
19
19
|
@executable = File.expand_path([__FILE__, "..", "..", "..", "exe", "vore-spider#{ext}"].join(FILE_SEPERATOR))
|
20
|
-
@
|
20
|
+
@parent_output_dir = "tmp/vore"
|
21
21
|
|
22
22
|
return if File.exist?(@executable)
|
23
23
|
|
@@ -26,7 +26,7 @@ module Vore
|
|
26
26
|
end
|
27
27
|
|
28
28
|
def scrape_each_page(website, &block)
|
29
|
-
output_dir = "#{@
|
29
|
+
output_dir = "#{@parent_output_dir}/#{website.gsub(/[^a-zA-Z0-9]/, "_").squeeze("_")}"
|
30
30
|
Vore.logger.info("Vore started crawling #{website}, outputting to #{output_dir}")
|
31
31
|
|
32
32
|
output = %x(#{@executable} \
|
@@ -38,7 +38,7 @@ module Vore
|
|
38
38
|
|
39
39
|
Vore.logger.info("Vore finished crawling #{website}: #{output}")
|
40
40
|
|
41
|
-
Dir.glob("
|
41
|
+
Dir.glob(File.join(output_dir, "**", "*")).each do |path|
|
42
42
|
next unless File.file?(path)
|
43
43
|
|
44
44
|
html_file = File.read(path).force_encoding("UTF-8")
|
data/lib/vore/version.rb
CHANGED