vore 0.2.1-arm64-linux → 0.2.2-arm64-linux

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 46df38a7d14df27f53723a4ce83c9a3d34d9a3f516a32a62b1d2150665c4b622
4
- data.tar.gz: 96e6ba5043de693565bd96815391f91f043390278a786abe951a662cc8f1c064
3
+ metadata.gz: a47f073e872dec14b08f1d243b67511210b8695735bfc79e0ac66b6be59ad982
4
+ data.tar.gz: e6a3f95fd41212bdc9401a10b98a5b39f7414c3a3dbb2ded96ea9da37d06f523
5
5
  SHA512:
6
- metadata.gz: dcc8804b9f9cf57ecc7c87bfabec8416a8e12fc4987b713e2f3b16e6e2201a82b2be036590d37a9a6c4c8c58af9e6d52d9ed14efa4e84fd0472f7b0465290376
7
- data.tar.gz: 5b2c0166e960100ca5f56d6df1c3f9223b9edcaf33a296338d640fd2bacdc4aab89eefcaa7799b46b97a2181d8e7f2cad9d0557e12ad54136f3efb9b7dfe0aa4
6
+ metadata.gz: ad07280c37d0f56bac4d597c7314c924ba56aa7262c70ab5026e6474a6d809f5b115f0f3cef6cfdc744589069daf06796994024211202705ab16fe224118fbbe
7
+ data.tar.gz: 65bd39b3786caa8216ebf04b246bcd164b66845bd2cd2d1f5ccac72428811bd1a45baee910f81a299dcb2b39621e4f97ea42b4f9dae0fcc22c7355fe96743c86
data/lib/vore/crawler.rb CHANGED
@@ -31,6 +31,7 @@ module Vore
31
31
 
32
32
  output = %x(#{@executable} \
33
33
  --user-agent #{user_agent} \
34
+ --delay 3000
34
35
  --url #{website} \
35
36
  download \
36
37
  -t \
@@ -38,12 +39,25 @@ module Vore
38
39
 
39
40
  Vore.logger.info("Vore finished crawling #{website}: #{output}")
40
41
 
42
+ results = {
43
+ pages_visited: 0,
44
+ pages_unprocessed: 0,
45
+ unprocessed_pages: [],
46
+ }
47
+
41
48
  Dir.glob(File.join(output_dir, "**", "*")).each do |path|
42
49
  next unless File.file?(path)
43
50
 
44
51
  html_file = File.read(path).force_encoding("UTF-8")
45
52
  rewritten_html_file = @selma.rewrite(html_file)
46
53
 
54
+ results[:pages_visited] += 1
55
+ if rewritten_html_file.empty?
56
+ results[:pages_unprocessed] += 1
57
+ results[:unprocessed_pages] << path
58
+ next
59
+ end
60
+
47
61
  # drops the first 3 parts of the path, which are "tmp", "vore", and the site name
48
62
  url_path = path.split(FILE_SEPERATOR)[3..].join("/")
49
63
 
@@ -58,6 +72,8 @@ module Vore
58
72
  ensure
59
73
  File.delete(path) if File.file?(path)
60
74
  end
75
+
76
+ results
61
77
  end
62
78
 
63
79
  # def crawl(site, block)
@@ -19,7 +19,13 @@ module Vole
19
19
  end
20
20
 
21
21
  def handle_element(element)
22
- if element.tag_name == "pre" || element.tag_name == "code" || element.tag_name == "script" || element.tag_name == "form"
22
+ if element.tag_name == "pre" ||
23
+ element.tag_name == "code" ||
24
+ element.tag_name == "form" ||
25
+ element.tag_name == "style" ||
26
+ element.tag_name == "noscript" ||
27
+ element.tag_name == "script" ||
28
+ element.tag_name == "svg"
23
29
  element.remove
24
30
  elsif element.tag_name == "title"
25
31
  @within_title = true
data/lib/vore/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Vore
4
- VERSION = "0.2.1"
4
+ VERSION = "0.2.2"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: vore
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  platform: arm64-linux
6
6
  authors:
7
7
  - Garen J. Torikian