vore 0.2.0-x86_64-windows → 0.2.1-x86_64-windows
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/exe/vore-spider.exe +0 -0
- data/lib/vore/crawler.rb +3 -3
- data/lib/vore/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3c6f05422a96c467db934bcc38dfe52e031a8a4515780c55ebb1f78993825e1b
|
4
|
+
data.tar.gz: d26f5e75aa905b9ab9e61dd93dedff583739ef1e509a0e8ad999b8f69872d494
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e9cf15323bc49343f834eca443d9231e709e1e9690f9fa0fc89820508e8c356556049b1ff6292fe6b4578fd8e4e91f9e38d05f158dd9d52f8196159906019bed
|
7
|
+
data.tar.gz: 02ac5c1fed91bc8b4136fbe119eb4c195a6facac16b431224785fc6ad812ffc8180b40dbbfee11cdba4213e99316d9d7de7dffff3dba95daccde734348a0c8e6
|
data/exe/vore-spider.exe
CHANGED
Binary file
|
data/lib/vore/crawler.rb
CHANGED
@@ -17,7 +17,7 @@ module Vore
|
|
17
17
|
@selma = Selma::Rewriter.new(sanitizer: Selma::Sanitizer.new(sanitization_config), handlers: [@content_extractor])
|
18
18
|
ext = PLATFORM.include?("windows") ? ".exe" : ""
|
19
19
|
@executable = File.expand_path([__FILE__, "..", "..", "..", "exe", "vore-spider#{ext}"].join(FILE_SEPERATOR))
|
20
|
-
@
|
20
|
+
@parent_output_dir = "tmp/vore"
|
21
21
|
|
22
22
|
return if File.exist?(@executable)
|
23
23
|
|
@@ -26,7 +26,7 @@ module Vore
|
|
26
26
|
end
|
27
27
|
|
28
28
|
def scrape_each_page(website, &block)
|
29
|
-
output_dir = "#{@
|
29
|
+
output_dir = "#{@parent_output_dir}/#{website.gsub(/[^a-zA-Z0-9]/, "_").squeeze("_")}"
|
30
30
|
Vore.logger.info("Vore started crawling #{website}, outputting to #{output_dir}")
|
31
31
|
|
32
32
|
output = %x(#{@executable} \
|
@@ -38,7 +38,7 @@ module Vore
|
|
38
38
|
|
39
39
|
Vore.logger.info("Vore finished crawling #{website}: #{output}")
|
40
40
|
|
41
|
-
Dir.glob("
|
41
|
+
Dir.glob(File.join(output_dir, "**", "*")).each do |path|
|
42
42
|
next unless File.file?(path)
|
43
43
|
|
44
44
|
html_file = File.read(path).force_encoding("UTF-8")
|
data/lib/vore/version.rb
CHANGED