vore 0.2.4-x86_64-windows → 0.2.8-x86_64-windows

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 571c472b2c7e94a786883d6005c5a913e87581762f7ee90bdd915ec366cda335
4
- data.tar.gz: 55cf52342b9b3b335469b7ac527f0c9bd9c13f86858591b015920106b5a7b70c
3
+ metadata.gz: 85a50437d0557e28ff3eeb155a8e527163aeb5f90fd98f0e5cb8e09e3d81bb6e
4
+ data.tar.gz: 61d1255a042db43b6e50bc749fccceb452080afd07e1eac04461ef4894fb8027
5
5
  SHA512:
6
- metadata.gz: 3d8adc4b1cad88301ca10d57b654781ff4c7a66a75fb5e8cec08aa21469de51568162c22615001f60881a3a6b0efe72606ba1ca8d4846e77b2fd1527a2906eb2
7
- data.tar.gz: 6db4625ac4e0d7c586c1e12b5012494eab0fb74a75a441cd38f49873e09b8d2d6b9b532d227fdd2807b7cf9f6edb153c3a4798e737b4913cd51aa3d855db4b0d
6
+ metadata.gz: d5c8a45adf9b4402d3600ae26728afb89b5e4d24953ad905462ad6b7c2d682a2806ed452673c486000621e5bf0373de6dd85a28ba592cb2e5eb1e86d83eb973f
7
+ data.tar.gz: 0077fcc1c2173c46be332da7727f6560a1cf8b2351491bc147b0d1111cefc82dc8bf8081e0f2370e2f7432d1905b2419e8bc168294dfef8a4387d609d512dae8
data/exe/vore-spider.exe CHANGED
Binary file
data/lib/vore/crawler.rb CHANGED
@@ -8,6 +8,8 @@ module Vore
8
8
  PLATFORM = [:cpu, :os].map { |m| Gem::Platform.local.send(m) }.join("-")
9
9
  FILE_SEPERATOR = PLATFORM.include?("windows") ? File::ALT_SEPARATOR : File::SEPARATOR
10
10
 
11
+ attr_reader :output_dir
12
+
11
13
  # Creates a crawler
12
14
  # denylist: Sets a denylist filter, allows a regexp, string or array of either to be matched.
13
15
  def initialize(denylist: /a^/, sanitization_config: Vole::Configuration::DEFAULT_SANITIZATION_CONFIG)
@@ -26,16 +28,10 @@ module Vore
26
28
  end
27
29
 
28
30
  def scrape_each_page(website, &block)
29
- output_dir = "#{@parent_output_dir}/#{website.gsub(/[^a-zA-Z0-9]/, "_").squeeze("_")}"
31
+ @output_dir = "#{@parent_output_dir}/#{website.gsub(/[^a-zA-Z0-9]/, "_").squeeze("_")}"
30
32
  Vore.logger.info("Vore started crawling #{website}, outputting to #{output_dir}")
31
33
 
32
- output = %x(#{@executable} \
33
- --user-agent #{user_agent} \
34
- --delay 3000 \
35
- --url #{website} \
36
- download \
37
- -t \
38
- #{output_dir})
34
+ output = run_command(website, @output_dir)
39
35
 
40
36
  Vore.logger.info("Vore finished crawling #{website}: #{output}")
41
37
 
@@ -48,16 +44,25 @@ module Vore
48
44
  Dir.glob(File.join(output_dir, "**", "*")).each do |path|
49
45
  next unless File.file?(path)
50
46
 
47
+ results[:pages_visited] += 1
48
+
51
49
  html_file = File.read(path).force_encoding("UTF-8")
52
- rewritten_html_file = @selma.rewrite(html_file)
50
+ rewritten_html_file = ""
53
51
 
54
- results[:pages_visited] += 1
55
- if rewritten_html_file.empty?
52
+ if html_file.empty?
56
53
  results[:pages_unprocessed] += 1
57
54
  results[:unprocessed_pages] << path
58
55
  next
59
56
  end
60
57
 
58
+ begin
59
+ rewritten_html_file = @selma.rewrite(html_file)
60
+ rescue StandardError => e
61
+ Vore.logger.warn("Error rewriting #{path}: #{e}")
62
+ results[:pages_unprocessed] += 1
63
+ next
64
+ end
65
+
61
66
  # drops the first 3 parts of the path, which are "tmp", "vore", and the site name
62
67
  url_path = path.split(FILE_SEPERATOR)[3..].join("/")
63
68
 
@@ -81,6 +86,16 @@ module Vore
81
86
  # crawl_site(site)
82
87
  # end
83
88
 
89
+ def run_command(website, output_dir)
90
+ %x(#{@executable} \
91
+ --user-agent #{user_agent} \
92
+ --delay 3500 \
93
+ --url #{website} \
94
+ download \
95
+ -t \
96
+ #{output_dir})
97
+ end
98
+
84
99
  def user_agent
85
100
  "'Mozilla/5.0 (compatible; Vore/#{Vore::VERSION}; +https://github.com/gjtorikian/vore)'"
86
101
  end
@@ -20,7 +20,6 @@ module Vole
20
20
 
21
21
  def handle_element(element)
22
22
  if element.tag_name == "pre" ||
23
- element.tag_name == "code" ||
24
23
  element.tag_name == "form" ||
25
24
  element.tag_name == "style" ||
26
25
  element.tag_name == "noscript" ||
data/lib/vore/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Vore
4
- VERSION = "0.2.4"
4
+ VERSION = "0.2.8"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: vore
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4
4
+ version: 0.2.8
5
5
  platform: x86_64-windows
6
6
  authors:
7
7
  - Garen J. Torikian