vore 0.2.1-arm64-darwin → 0.2.2-arm64-darwin

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 119b1cdc06e20e708ceda86505a02361c19bf0de881300222e164bac1bbe6cc6
4
- data.tar.gz: 75febe4d2f4cd048722d94a2f265b6239db67f08282a26de70a5c85bc4735933
3
+ metadata.gz: 61ddb28c73e9dcda499667e7440f3a185cefb14195304f74a84e9920e986535c
4
+ data.tar.gz: c1904fda0864dba3f2aa444430d66e7485b41b39d19592a20bd59a067125221e
5
5
  SHA512:
6
- metadata.gz: 364c154a766444e457769925a2c0a4fe890c53fbd9a7726d9896afac6389807806a6c36a1a3e3d27ae5e7c8de837c0081dab4b8397e9f23a8a7655607f5c62d7
7
- data.tar.gz: c32f3ba74037034ff6f957cd397a61ad6c8328a2f282196b45e69584c5693b7647e4dd1cad105aa7b20475106ebe9933f80b81585061f42338b073005b7fe217
6
+ metadata.gz: 5015b25a45cb9bd7c36ad85319416f314bfe63f16daf3bd910ef082cba0eef2885656bc43372e2b4c5d02570931892063511b1a327f967b8045b0e0c306c995d
7
+ data.tar.gz: 7ee0bd863322612f060491237bae1253d2cc57682dac9d4a58a25eaf198ff8145f7e53101b5715c42a41e8cb8c9c0c26ed4c8f0d15b21fd39052bf19b826a150
data/exe/vore-spider CHANGED
Binary file
data/lib/vore/crawler.rb CHANGED
@@ -31,6 +31,7 @@ module Vore
31
31
 
32
32
  output = %x(#{@executable} \
33
33
  --user-agent #{user_agent} \
34
+ --delay 3000
34
35
  --url #{website} \
35
36
  download \
36
37
  -t \
@@ -38,12 +39,25 @@ module Vore
38
39
 
39
40
  Vore.logger.info("Vore finished crawling #{website}: #{output}")
40
41
 
42
+ results = {
43
+ pages_visited: 0,
44
+ pages_unprocessed: 0,
45
+ unprocessed_pages: [],
46
+ }
47
+
41
48
  Dir.glob(File.join(output_dir, "**", "*")).each do |path|
42
49
  next unless File.file?(path)
43
50
 
44
51
  html_file = File.read(path).force_encoding("UTF-8")
45
52
  rewritten_html_file = @selma.rewrite(html_file)
46
53
 
54
+ results[:pages_visited] += 1
55
+ if rewritten_html_file.empty?
56
+ results[:pages_unprocessed] += 1
57
+ results[:unprocessed_pages] << path
58
+ next
59
+ end
60
+
47
61
  # drops the first 3 parts of the path, which are "tmp", "vore", and the site name
48
62
  url_path = path.split(FILE_SEPERATOR)[3..].join("/")
49
63
 
@@ -58,6 +72,8 @@ module Vore
58
72
  ensure
59
73
  File.delete(path) if File.file?(path)
60
74
  end
75
+
76
+ results
61
77
  end
62
78
 
63
79
  # def crawl(site, block)
@@ -19,7 +19,13 @@ module Vole
19
19
  end
20
20
 
21
21
  def handle_element(element)
22
- if element.tag_name == "pre" || element.tag_name == "code" || element.tag_name == "script" || element.tag_name == "form"
22
+ if element.tag_name == "pre" ||
23
+ element.tag_name == "code" ||
24
+ element.tag_name == "form" ||
25
+ element.tag_name == "style" ||
26
+ element.tag_name == "noscript" ||
27
+ element.tag_name == "script" ||
28
+ element.tag_name == "svg"
23
29
  element.remove
24
30
  elsif element.tag_name == "title"
25
31
  @within_title = true
data/lib/vore/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Vore
4
- VERSION = "0.2.1"
4
+ VERSION = "0.2.2"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: vore
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  platform: arm64-darwin
6
6
  authors:
7
7
  - Garen J. Torikian