vore 0.2.6-arm64-linux → 0.2.8-arm64-linux

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 72fc70a334c49c0cd3d49fa18a0754e593024555276c128503f6d2897c3bb25a
4
- data.tar.gz: 47d4b811f806876617702d1d599848eaa3948f4ebae715a686d05919a37a9f1c
3
+ metadata.gz: 0f77381da3968a295e7b13edb86877a2d94646c9f9730bcd2e891c1bd885956a
4
+ data.tar.gz: b392e9cf87a1bf80ec9b03603c6984b55c76a6da569eba836af48c090f865d5b
5
5
  SHA512:
6
- metadata.gz: 5b6d5a89270fdf1b4630cc09ea1e7e0e9622b3af61a5965ba48718a2b6b2706d5984f59ffa510a8b192910604e7a1ff79f3b373466f3181d3f69ebd714bad3e3
7
- data.tar.gz: 9906bcb9a83a4ea7c12de3be523fa3b534a6e0faeb2bf4dcc586a3e097c4efc5fd511d091641a902b4b050be6cff65e25809fb453298b587074aeda8ae3f932d
6
+ metadata.gz: abb134c68955daa0ef225183f2bf1563a449395cc93a4ff6bf7f8161d1fcde9e315327d3fa50bfe2081f6ab301cc869c0fd231774c5794ad34043c6093e797c1
7
+ data.tar.gz: 858cdee603239b6e5c7cdbbf3538d5566ca726d08e986376d527d549d8f4bf4de8bccb325758b5449baae37575b7e954bd78c02ab816875589c5273c66b11ea1
data/lib/vore/crawler.rb CHANGED
@@ -8,6 +8,8 @@ module Vore
8
8
  PLATFORM = [:cpu, :os].map { |m| Gem::Platform.local.send(m) }.join("-")
9
9
  FILE_SEPERATOR = PLATFORM.include?("windows") ? File::ALT_SEPARATOR : File::SEPARATOR
10
10
 
11
+ attr_reader :output_dir
12
+
11
13
  # Creates a crawler
12
14
  # denylist: Sets a denylist filter, allows a regexp, string or array of either to be matched.
13
15
  def initialize(denylist: /a^/, sanitization_config: Vole::Configuration::DEFAULT_SANITIZATION_CONFIG)
@@ -26,16 +28,10 @@ module Vore
26
28
  end
27
29
 
28
30
  def scrape_each_page(website, &block)
29
- output_dir = "#{@parent_output_dir}/#{website.gsub(/[^a-zA-Z0-9]/, "_").squeeze("_")}"
31
+ @output_dir = "#{@parent_output_dir}/#{website.gsub(/[^a-zA-Z0-9]/, "_").squeeze("_")}"
30
32
  Vore.logger.info("Vore started crawling #{website}, outputting to #{output_dir}")
31
33
 
32
- output = %x(#{@executable} \
33
- --user-agent #{user_agent} \
34
- --delay 3500 \
35
- --url #{website} \
36
- download \
37
- -t \
38
- #{output_dir})
34
+ output = run_command(website, @output_dir)
39
35
 
40
36
  Vore.logger.info("Vore finished crawling #{website}: #{output}")
41
37
 
@@ -54,7 +50,6 @@ module Vore
54
50
  rewritten_html_file = ""
55
51
 
56
52
  if html_file.empty?
57
- Vore.logger.warn("HTML file empty: #{path}")
58
53
  results[:pages_unprocessed] += 1
59
54
  results[:unprocessed_pages] << path
60
55
  next
@@ -91,6 +86,16 @@ module Vore
91
86
  # crawl_site(site)
92
87
  # end
93
88
 
89
+ def run_command(website, output_dir)
90
+ %x(#{@executable} \
91
+ --user-agent #{user_agent} \
92
+ --delay 3500 \
93
+ --url #{website} \
94
+ download \
95
+ -t \
96
+ #{output_dir})
97
+ end
98
+
94
99
  def user_agent
95
100
  "'Mozilla/5.0 (compatible; Vore/#{Vore::VERSION}; +https://github.com/gjtorikian/vore)'"
96
101
  end
@@ -20,7 +20,6 @@ module Vole
20
20
 
21
21
  def handle_element(element)
22
22
  if element.tag_name == "pre" ||
23
- element.tag_name == "code" ||
24
23
  element.tag_name == "form" ||
25
24
  element.tag_name == "style" ||
26
25
  element.tag_name == "noscript" ||
data/lib/vore/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Vore
4
- VERSION = "0.2.6"
4
+ VERSION = "0.2.8"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: vore
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.6
4
+ version: 0.2.8
5
5
  platform: arm64-linux
6
6
  authors:
7
7
  - Garen J. Torikian