vore 0.2.8-arm64-darwin → 0.3.0-arm64-darwin
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/exe/vore-spider +0 -0
- data/lib/vore/configuration.rb +4 -0
- data/lib/vore/crawler.rb +6 -5
- data/lib/vore/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 54a3f9525133d20d29eadd67c83ef0c996c3e85ae6843b3ea239e3cff0d9bea3
|
4
|
+
data.tar.gz: 065af90bf1234459fe430a2c49882d988e4b1367786dc506a7cf571804812339
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 540ceeac482a7b9274161a5b64c7d5d0fd570dd3756934ae8a6dcaa760869b81c3f56d89b7c959f7a18c66fffe94c9b215efff8e648d5580aaeaa823e10405e3
|
7
|
+
data.tar.gz: f1d56bc655ac4e42e720837d00df756d497f55ceca844edc4fd7ceb887736458afb02532b90a1fe0f95a7e8b1fe3e551edd3ea196fa8d995c3bd3a5f42db7f32
|
data/exe/vore-spider
CHANGED
Binary file
|
data/lib/vore/configuration.rb
CHANGED
data/lib/vore/crawler.rb
CHANGED
@@ -12,7 +12,7 @@ module Vore
|
|
12
12
|
|
13
13
|
# Creates a crawler
|
14
14
|
# denylist: Sets a denylist filter, allows a regexp, string or array of either to be matched.
|
15
|
-
def initialize(denylist: /a^/, sanitization_config: Vole::Configuration::DEFAULT_SANITIZATION_CONFIG)
|
15
|
+
def initialize(denylist: /a^/, sanitization_config: Vole::Configuration::DEFAULT_SANITIZATION_CONFIG, options: Vole::Configuration::DEFAULT_OPTIONS)
|
16
16
|
@denylist_regexp = Regexp.union(denylist)
|
17
17
|
|
18
18
|
@content_extractor = Vole::Handlers::ContentExtractor.new
|
@@ -20,6 +20,7 @@ module Vore
|
|
20
20
|
ext = PLATFORM.include?("windows") ? ".exe" : ""
|
21
21
|
@executable = File.expand_path([__FILE__, "..", "..", "..", "exe", "vore-spider#{ext}"].join(FILE_SEPERATOR))
|
22
22
|
@parent_output_dir = "tmp/vore"
|
23
|
+
@options = options
|
23
24
|
|
24
25
|
return if File.exist?(@executable)
|
25
26
|
|
@@ -31,7 +32,7 @@ module Vore
|
|
31
32
|
@output_dir = "#{@parent_output_dir}/#{website.gsub(/[^a-zA-Z0-9]/, "_").squeeze("_")}"
|
32
33
|
Vore.logger.info("Vore started crawling #{website}, outputting to #{output_dir}")
|
33
34
|
|
34
|
-
output = run_command(website, @
|
35
|
+
output = run_command(website, delay: @options[:delay])
|
35
36
|
|
36
37
|
Vore.logger.info("Vore finished crawling #{website}: #{output}")
|
37
38
|
|
@@ -86,14 +87,14 @@ module Vore
|
|
86
87
|
# crawl_site(site)
|
87
88
|
# end
|
88
89
|
|
89
|
-
def run_command(website,
|
90
|
+
def run_command(website, delay: 3500)
|
90
91
|
%x(#{@executable} \
|
91
92
|
--user-agent #{user_agent} \
|
92
|
-
--delay
|
93
|
+
--delay #{delay} \
|
93
94
|
--url #{website} \
|
94
95
|
download \
|
95
96
|
-t \
|
96
|
-
#{output_dir})
|
97
|
+
#{@output_dir})
|
97
98
|
end
|
98
99
|
|
99
100
|
def user_agent
|
data/lib/vore/version.rb
CHANGED