vore 0.2.5-x86_64-windows → 0.2.8-x86_64-windows

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b244d29c525e65f76e7cffc59c453f26883d3c7c5d7a7ed2248ecaf319f22515
4
- data.tar.gz: 9bd4a0446694812585e68cf8c0b750f808979f0732a136d6d15273a84e7afd4b
3
+ metadata.gz: 85a50437d0557e28ff3eeb155a8e527163aeb5f90fd98f0e5cb8e09e3d81bb6e
4
+ data.tar.gz: 61d1255a042db43b6e50bc749fccceb452080afd07e1eac04461ef4894fb8027
5
5
  SHA512:
6
- metadata.gz: 2783cd4297442c2f4aee698adf301b130873d8e38d01bbdea0f32ec59fc70cdd332cd08f4b7bf3fbfd5e796cb003bc332a9e55d199af36bbacc40cac56e746ca
7
- data.tar.gz: caea7c9488653fa4ad09eafc82dc6b1584379016a6e45256ba4bf7f83f8ddfe9ba8218144591ad62593f38b8dd6abc32e6d599932ec0a53c41f09efb3c594a7f
6
+ metadata.gz: d5c8a45adf9b4402d3600ae26728afb89b5e4d24953ad905462ad6b7c2d682a2806ed452673c486000621e5bf0373de6dd85a28ba592cb2e5eb1e86d83eb973f
7
+ data.tar.gz: 0077fcc1c2173c46be332da7727f6560a1cf8b2351491bc147b0d1111cefc82dc8bf8081e0f2370e2f7432d1905b2419e8bc168294dfef8a4387d609d512dae8
data/exe/vore-spider.exe CHANGED
Binary file
data/lib/vore/crawler.rb CHANGED
@@ -8,6 +8,8 @@ module Vore
8
8
  PLATFORM = [:cpu, :os].map { |m| Gem::Platform.local.send(m) }.join("-")
9
9
  FILE_SEPERATOR = PLATFORM.include?("windows") ? File::ALT_SEPARATOR : File::SEPARATOR
10
10
 
11
+ attr_reader :output_dir
12
+
11
13
  # Creates a crawler
12
14
  # denylist: Sets a denylist filter, allows a regexp, string or array of either to be matched.
13
15
  def initialize(denylist: /a^/, sanitization_config: Vole::Configuration::DEFAULT_SANITIZATION_CONFIG)
@@ -26,16 +28,10 @@ module Vore
26
28
  end
27
29
 
28
30
  def scrape_each_page(website, &block)
29
- output_dir = "#{@parent_output_dir}/#{website.gsub(/[^a-zA-Z0-9]/, "_").squeeze("_")}"
31
+ @output_dir = "#{@parent_output_dir}/#{website.gsub(/[^a-zA-Z0-9]/, "_").squeeze("_")}"
30
32
  Vore.logger.info("Vore started crawling #{website}, outputting to #{output_dir}")
31
33
 
32
- output = %x(#{@executable} \
33
- --user-agent #{user_agent} \
34
- --delay 3500 \
35
- --url #{website} \
36
- download \
37
- -t \
38
- #{output_dir})
34
+ output = run_command(website, @output_dir)
39
35
 
40
36
  Vore.logger.info("Vore finished crawling #{website}: #{output}")
41
37
 
@@ -51,6 +47,7 @@ module Vore
51
47
  results[:pages_visited] += 1
52
48
 
53
49
  html_file = File.read(path).force_encoding("UTF-8")
50
+ rewritten_html_file = ""
54
51
 
55
52
  if html_file.empty?
56
53
  results[:pages_unprocessed] += 1
@@ -58,7 +55,13 @@ module Vore
58
55
  next
59
56
  end
60
57
 
61
- rewritten_html_file = @selma.rewrite(html_file)
58
+ begin
59
+ rewritten_html_file = @selma.rewrite(html_file)
60
+ rescue StandardError => e
61
+ Vore.logger.warn("Error rewriting #{path}: #{e}")
62
+ results[:pages_unprocessed] += 1
63
+ next
64
+ end
62
65
 
63
66
  # drops the first 3 parts of the path, which are "tmp", "vore", and the site name
64
67
  url_path = path.split(FILE_SEPERATOR)[3..].join("/")
@@ -83,6 +86,16 @@ module Vore
83
86
  # crawl_site(site)
84
87
  # end
85
88
 
89
+ def run_command(website, output_dir)
90
+ %x(#{@executable} \
91
+ --user-agent #{user_agent} \
92
+ --delay 3500 \
93
+ --url #{website} \
94
+ download \
95
+ -t \
96
+ #{output_dir})
97
+ end
98
+
86
99
  def user_agent
87
100
  "'Mozilla/5.0 (compatible; Vore/#{Vore::VERSION}; +https://github.com/gjtorikian/vore)'"
88
101
  end
@@ -20,7 +20,6 @@ module Vole
20
20
 
21
21
  def handle_element(element)
22
22
  if element.tag_name == "pre" ||
23
- element.tag_name == "code" ||
24
23
  element.tag_name == "form" ||
25
24
  element.tag_name == "style" ||
26
25
  element.tag_name == "noscript" ||
data/lib/vore/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Vore
4
- VERSION = "0.2.5"
4
+ VERSION = "0.2.8"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: vore
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.5
4
+ version: 0.2.8
5
5
  platform: x86_64-windows
6
6
  authors:
7
7
  - Garen J. Torikian