vore 0.2.0-x86_64-darwin → 0.2.1-x86_64-darwin
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/exe/vore-spider +0 -0
- data/lib/vore/crawler.rb +3 -3
- data/lib/vore/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 31497cb9cec566256acca79e43bfb5b6fe8791bea446bba60ca895f2be3ab91b
|
4
|
+
data.tar.gz: 2252352ba0f823215117c143d0608431bfd6e0037f9b122f6711c191dc7f99c2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a0f51ccfc30cb95ff87b6275c033b343ca7c07994f9937fad18a670d8973dd2c0d2db8c40f5849d2e32a372c1e755e97b3489b46532bd9c0317e7bfdc88786e0
|
7
|
+
data.tar.gz: 9517a87119025f30bdb105fa89440990ec260b32db10e497a6775271fec547358a4552edf6033508d27225a12427bd61272b13e0585824802c4490d7187173de
|
data/exe/vore-spider
CHANGED
Binary file
|
data/lib/vore/crawler.rb
CHANGED
@@ -17,7 +17,7 @@ module Vore
|
|
17
17
|
@selma = Selma::Rewriter.new(sanitizer: Selma::Sanitizer.new(sanitization_config), handlers: [@content_extractor])
|
18
18
|
ext = PLATFORM.include?("windows") ? ".exe" : ""
|
19
19
|
@executable = File.expand_path([__FILE__, "..", "..", "..", "exe", "vore-spider#{ext}"].join(FILE_SEPERATOR))
|
20
|
-
@
|
20
|
+
@parent_output_dir = "tmp/vore"
|
21
21
|
|
22
22
|
return if File.exist?(@executable)
|
23
23
|
|
@@ -26,7 +26,7 @@ module Vore
|
|
26
26
|
end
|
27
27
|
|
28
28
|
def scrape_each_page(website, &block)
|
29
|
-
output_dir = "#{@
|
29
|
+
output_dir = "#{@parent_output_dir}/#{website.gsub(/[^a-zA-Z0-9]/, "_").squeeze("_")}"
|
30
30
|
Vore.logger.info("Vore started crawling #{website}, outputting to #{output_dir}")
|
31
31
|
|
32
32
|
output = %x(#{@executable} \
|
@@ -38,7 +38,7 @@ module Vore
|
|
38
38
|
|
39
39
|
Vore.logger.info("Vore finished crawling #{website}: #{output}")
|
40
40
|
|
41
|
-
Dir.glob("
|
41
|
+
Dir.glob(File.join(output_dir, "**", "*")).each do |path|
|
42
42
|
next unless File.file?(path)
|
43
43
|
|
44
44
|
html_file = File.read(path).force_encoding("UTF-8")
|
data/lib/vore/version.rb
CHANGED