vore 0.2.6-arm64-darwin → 0.2.8-arm64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 402d817b4979ae3cb7caee99653fc2ddfccf726b53f49c76dc078d54a9868af3
4
- data.tar.gz: 1768208e84f98fbbcb5e8bb683528f6f476c928d79c1e006cb1b6a8bf521cae7
3
+ metadata.gz: cea3bfc2d1ad6b14200a35b52eaa739bb14b2f6d5ace761be3d77a53366f19b2
4
+ data.tar.gz: '03861e5e9ccdbca488f9397bc59f480f48e6d62e03b4cf40ee652e3cc57745da'
5
5
  SHA512:
6
- metadata.gz: '0091d5f55c923e91abfa57b01e7857072196c85e1053568b5799c715c6d188a27075a82c9393e9fdadade34ff02832c98780dcf9e1f70970f45db29a77fbc733'
7
- data.tar.gz: 5915151dbdd4d055f5cf6305dada2a0a82d817a8563ddffab52582f6c580903a2f29269b81c3591c407775ab522d66e55c5a901d3afb4640aa0e959c56f88e2c
6
+ metadata.gz: b7c448fce4a1c6b26b6bff99fbe91053588f56b86b077567913902b4c3405d805d56e41ae430d302fbe2c8d834b50514a945f7361b8996c992d33355cdd7269a
7
+ data.tar.gz: aa7a8facb68f637b2642b4ee1fbb7437b4cf78ede8f31c8505e9d8141f45bdb4ef689f80ca498af4eb681cd6d5c6c3cbbdba8059f8890ad9fcb37eac5a480095
data/exe/vore-spider CHANGED
Binary file
data/lib/vore/crawler.rb CHANGED
@@ -8,6 +8,8 @@ module Vore
8
8
  PLATFORM = [:cpu, :os].map { |m| Gem::Platform.local.send(m) }.join("-")
9
9
  FILE_SEPERATOR = PLATFORM.include?("windows") ? File::ALT_SEPARATOR : File::SEPARATOR
10
10
 
11
+ attr_reader :output_dir
12
+
11
13
  # Creates a crawler
12
14
  # denylist: Sets a denylist filter, allows a regexp, string or array of either to be matched.
13
15
  def initialize(denylist: /a^/, sanitization_config: Vole::Configuration::DEFAULT_SANITIZATION_CONFIG)
@@ -26,16 +28,10 @@ module Vore
26
28
  end
27
29
 
28
30
  def scrape_each_page(website, &block)
29
- output_dir = "#{@parent_output_dir}/#{website.gsub(/[^a-zA-Z0-9]/, "_").squeeze("_")}"
31
+ @output_dir = "#{@parent_output_dir}/#{website.gsub(/[^a-zA-Z0-9]/, "_").squeeze("_")}"
30
32
  Vore.logger.info("Vore started crawling #{website}, outputting to #{output_dir}")
31
33
 
32
- output = %x(#{@executable} \
33
- --user-agent #{user_agent} \
34
- --delay 3500 \
35
- --url #{website} \
36
- download \
37
- -t \
38
- #{output_dir})
34
+ output = run_command(website, @output_dir)
39
35
 
40
36
  Vore.logger.info("Vore finished crawling #{website}: #{output}")
41
37
 
@@ -54,7 +50,6 @@ module Vore
54
50
  rewritten_html_file = ""
55
51
 
56
52
  if html_file.empty?
57
- Vore.logger.warn("HTML file empty: #{path}")
58
53
  results[:pages_unprocessed] += 1
59
54
  results[:unprocessed_pages] << path
60
55
  next
@@ -91,6 +86,16 @@ module Vore
91
86
  # crawl_site(site)
92
87
  # end
93
88
 
89
+ def run_command(website, output_dir)
90
+ %x(#{@executable} \
91
+ --user-agent #{user_agent} \
92
+ --delay 3500 \
93
+ --url #{website} \
94
+ download \
95
+ -t \
96
+ #{output_dir})
97
+ end
98
+
94
99
  def user_agent
95
100
  "'Mozilla/5.0 (compatible; Vore/#{Vore::VERSION}; +https://github.com/gjtorikian/vore)'"
96
101
  end
@@ -20,7 +20,6 @@ module Vole
20
20
 
21
21
  def handle_element(element)
22
22
  if element.tag_name == "pre" ||
23
- element.tag_name == "code" ||
24
23
  element.tag_name == "form" ||
25
24
  element.tag_name == "style" ||
26
25
  element.tag_name == "noscript" ||
data/lib/vore/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Vore
4
- VERSION = "0.2.6"
4
+ VERSION = "0.2.8"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: vore
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.6
4
+ version: 0.2.8
5
5
  platform: arm64-darwin
6
6
  authors:
7
7
  - Garen J. Torikian