vore 0.2.1-arm64-linux → 0.2.3-arm64-linux

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 46df38a7d14df27f53723a4ce83c9a3d34d9a3f516a32a62b1d2150665c4b622
4
- data.tar.gz: 96e6ba5043de693565bd96815391f91f043390278a786abe951a662cc8f1c064
3
+ metadata.gz: e5b9fc7852a9bf9c1271f8b4ecba0a7144c2de434828ba5bd23c4ac27d7a12ad
4
+ data.tar.gz: 4aa0500aada9cb3ba7ec3a9f71872f97ec48203f9eb3f04648a6999110fb3d45
5
5
  SHA512:
6
- metadata.gz: dcc8804b9f9cf57ecc7c87bfabec8416a8e12fc4987b713e2f3b16e6e2201a82b2be036590d37a9a6c4c8c58af9e6d52d9ed14efa4e84fd0472f7b0465290376
7
- data.tar.gz: 5b2c0166e960100ca5f56d6df1c3f9223b9edcaf33a296338d640fd2bacdc4aab89eefcaa7799b46b97a2181d8e7f2cad9d0557e12ad54136f3efb9b7dfe0aa4
6
+ metadata.gz: 0faeb9c79b755d507c2ff2eac02dd3fb83a2c1482a97a2b2742be1d84ce989993615c3cf4172f48566cb2871d61532d09ce9849420779e4c06308806a50d6a37
7
+ data.tar.gz: cee36ddceb6c2baa131f8d7ff6da0a9f4f667f125f349a4fec86bb7ddfe3f65919bff0a9ca737728bad39165c05a9136af54b0267acf2ce83f39e68671de2dd1
data/lib/vore/crawler.rb CHANGED
@@ -31,6 +31,7 @@ module Vore
31
31
 
32
32
  output = %x(#{@executable} \
33
33
  --user-agent #{user_agent} \
34
+ --delay 3000 \
34
35
  --url #{website} \
35
36
  download \
36
37
  -t \
@@ -38,12 +39,25 @@ module Vore
38
39
 
39
40
  Vore.logger.info("Vore finished crawling #{website}: #{output}")
40
41
 
42
+ results = {
43
+ pages_visited: 0,
44
+ pages_unprocessed: 0,
45
+ unprocessed_pages: [],
46
+ }
47
+
41
48
  Dir.glob(File.join(output_dir, "**", "*")).each do |path|
42
49
  next unless File.file?(path)
43
50
 
44
51
  html_file = File.read(path).force_encoding("UTF-8")
45
52
  rewritten_html_file = @selma.rewrite(html_file)
46
53
 
54
+ results[:pages_visited] += 1
55
+ if rewritten_html_file.empty?
56
+ results[:pages_unprocessed] += 1
57
+ results[:unprocessed_pages] << path
58
+ next
59
+ end
60
+
47
61
  # drops the first 3 parts of the path, which are "tmp", "vore", and the site name
48
62
  url_path = path.split(FILE_SEPERATOR)[3..].join("/")
49
63
 
@@ -58,6 +72,8 @@ module Vore
58
72
  ensure
59
73
  File.delete(path) if File.file?(path)
60
74
  end
75
+
76
+ results
61
77
  end
62
78
 
63
79
  # def crawl(site, block)
@@ -19,7 +19,13 @@ module Vole
19
19
  end
20
20
 
21
21
  def handle_element(element)
22
- if element.tag_name == "pre" || element.tag_name == "code" || element.tag_name == "script" || element.tag_name == "form"
22
+ if element.tag_name == "pre" ||
23
+ element.tag_name == "code" ||
24
+ element.tag_name == "form" ||
25
+ element.tag_name == "style" ||
26
+ element.tag_name == "noscript" ||
27
+ element.tag_name == "script" ||
28
+ element.tag_name == "svg"
23
29
  element.remove
24
30
  elsif element.tag_name == "title"
25
31
  @within_title = true
data/lib/vore/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Vore
4
- VERSION = "0.2.1"
4
+ VERSION = "0.2.3"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: vore
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.3
5
5
  platform: arm64-linux
6
6
  authors:
7
7
  - Garen J. Torikian