snapcrawl 0.5.1 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 83077ce26e7825a14948d6048ff92ee5cc15a17e3eeaae5687d891b68d83cd92
4
- data.tar.gz: 15f264f683c08f9c0a2d379fa76e3dad1698a30808fb3694ccf334b45450c8ff
3
+ metadata.gz: 62a293da259afce5690315f27f2bbcd881e495a3d1b5344eb9ed9e2c46bd4a4d
4
+ data.tar.gz: d600fdbcd2344e5a19f853cbea67a0d8ad0c365a38d00aa4de8d02dd6e52e5b0
5
5
  SHA512:
6
- metadata.gz: 67d1a62d2c24dbf675c7e83449782ed88e7de62122082033239bc9ce436566dea5f1bad5b442784f1a476d05d75f20a52ec17c3a0fd6bd3a3a4ef7dcf7f76d07
7
- data.tar.gz: ab17a294bad674814dd1e47407f9da58ae9c81dd1becee0db9d57d944c41ea6727cad6123b03797bd238b061b3138d9c4c42b32bd7f384a04716aeb4d93ee670
6
+ metadata.gz: 3ebdb2355480bacd7f7a6faba264a31086e68c1864c692607fdb6fbc11df210eee17af936ab63305484ee46ac473d50b4033be11e995b51b9050b359c81dd906
7
+ data.tar.gz: 42a0a9f048fe9b5b1b04426d444710a256ccc8e9a914e3277f062c4ebf760d50a018c1f189e7b0cebced1c236f5d13ca56ab4abbf808a5ec4812bf9a754a9343
data/README.md CHANGED
@@ -27,7 +27,7 @@ You can run Snapcrawl by using this docker image (which contains all the
27
27
  necessary prerequisites):
28
28
 
29
29
  ```shell
30
- $ alias snapcrawl='docker run --rm -it --volume $PWD:/app dannyben/snapcrawl'
30
+ $ alias snapcrawl='docker run --rm -it --network host --volume "$PWD:/app" dannyben/snapcrawl'
31
31
  ```
32
32
 
33
33
  For more information on the Docker image, refer to the [docker-snapcrawl][3] repository.
@@ -16,10 +16,10 @@ module Snapcrawl
16
16
  # Config. The $logger is available, but it was not yet fully
17
17
  # configured with log_level etc.
18
18
  if File.exist? file
19
- # $logger.debug "loading config file %{green}#{file}%{reset}"
19
+ # $logger.debug "loading config file !txtgrn!#{file}"
20
20
  push file
21
21
  else
22
- # $logger.debug "creating config file %{green}#{file}%{reset}"
22
+ # $logger.debug "creating config file !txtgrn!#{file}"
23
23
  create_config file
24
24
  end
25
25
  end
@@ -7,7 +7,7 @@ module Snapcrawl
7
7
  attr_reader :url
8
8
 
9
9
  def initialize(url)
10
- $logger.debug "initializing crawler with %{green}#{url}%{reset}"
10
+ $logger.debug "initializing crawler with !txtgrn!#{url}"
11
11
 
12
12
  config_for_display = Config.settings.dup
13
13
  config_for_display['name_template'] = '%%{url}'
@@ -25,7 +25,7 @@ module Snapcrawl
25
25
  private
26
26
 
27
27
  def process_todo
28
- $logger.debug "processing queue: %{green}#{todo.count} remaining%{reset}"
28
+ $logger.debug "processing queue: !txtgrn!#{todo.count} remaining"
29
29
 
30
30
  url, page = todo.shift
31
31
  done.push url
@@ -40,12 +40,12 @@ module Snapcrawl
40
40
  next if todo.has_key?(sub_page) or done.include?(sub_page)
41
41
 
42
42
  if Config.url_whitelist and sub_page.path !~ /#{Config.url_whitelist}/
43
- $logger.debug "ignoring %{purple}%{underlined}#{sub_page.url}%{reset}, reason: whitelist"
43
+ $logger.debug "ignoring !undpur!#{sub_page.url}!txtrst!, reason: whitelist"
44
44
  next
45
45
  end
46
46
 
47
47
  if Config.url_blacklist and sub_page.path =~ /#{Config.url_blacklist}/
48
- $logger.debug "ignoring %{purple}%{underlined}#{sub_page.url}%{reset}, reason: blacklist"
48
+ $logger.debug "ignoring !undpur!#{sub_page.url}!txtrst!, reason: blacklist"
49
49
  next
50
50
  end
51
51
 
@@ -56,7 +56,7 @@ module Snapcrawl
56
56
  def process_page(page)
57
57
  outfile = "#{Config.snaps_dir}/#{Config.name_template}.png" % { url: page.url.to_slug }
58
58
 
59
- $logger.info "processing %{purple}%{underlined}#{page.url}%{reset}, depth: #{page.depth}"
59
+ $logger.info "processing !undpur!#{page.url}!txtrst!, depth: #{page.depth}"
60
60
 
61
61
  if !page.valid?
62
62
  $logger.debug "page #{page.path} is invalid, aborting process"
@@ -66,7 +66,7 @@ module Snapcrawl
66
66
  if file_fresh? outfile
67
67
  $logger.info "screenshot for #{page.path} already exists"
68
68
  else
69
- $logger.info "%{bold}capturing screenshot for #{page.path}%{reset}"
69
+ $logger.info "!bldgrn!capturing screenshot for #{page.path}"
70
70
  save_screenshot page, outfile
71
71
  end
72
72
 
@@ -76,7 +76,7 @@ module Snapcrawl
76
76
  def save_screenshot(page, outfile)
77
77
  page.save_screenshot outfile
78
78
  rescue => e
79
- $logger.error "screenshot error on %{purple}%{underlined}#{page.path}%{reset} - %{red}#{e.class}%{reset}: #{e.message}"
79
+ $logger.error "screenshot error on !undpur!#{page.path}!txtrst! - !txtred!#{e.class}!txtrst!: #{e.message}"
80
80
  end
81
81
 
82
82
  def file_fresh?(file)
@@ -8,10 +8,10 @@ module Snapcrawl
8
8
  def verify
9
9
  return if @verified
10
10
 
11
- $logger.debug 'verifying %{green}phantomjs%{reset} is present'
11
+ $logger.debug 'verifying !txtgrn!phantomjs!txtrst! is present'
12
12
  raise MissingPhantomJS unless command_exist? "phantomjs"
13
13
 
14
- $logger.debug 'verifying %{green}imagemagick%{reset} is present'
14
+ $logger.debug 'verifying !txtgrn!imagemagick!txtrst! is present'
15
15
  raise MissingImageMagick unless command_exist? "convert"
16
16
 
17
17
  @verified = true
@@ -1,57 +1,35 @@
1
+ require 'colsole'
2
+
1
3
  module Snapcrawl
2
4
  module LogHelpers
5
+ include Colsole
6
+
3
7
  SEVERITY_COLORS = {
4
- 'INFO' => :blue,
5
- 'WARN' => :yellow,
6
- 'ERROR' => :red,
7
- 'FATAL' => :red,
8
- 'DEBUG' => :cyan
8
+ 'INFO' => :txtblu,
9
+ 'WARN' => :txtylw,
10
+ 'ERROR' => :txtred,
11
+ 'FATAL' => :txtred,
12
+ 'DEBUG' => :txtcyn
9
13
  }
10
14
 
11
15
  def log_formatter
12
16
  proc do |severity, _time, _prog, message|
13
17
  severity_color = SEVERITY_COLORS[severity]
14
-
15
- "%{#{severity_color}}#{severity.rjust 5}%{reset} : #{message}\n" % log_colors
18
+ line = "!#{severity_color}!#{severity.rjust 5}!txtrst! : #{message}\n"
19
+ use_colors? ? colorize(line) : strip_color_markers(line)
16
20
  end
17
21
  end
18
22
 
19
- def log_colors
20
- @log_colors ||= log_colors!
21
- end
22
-
23
- def log_colors!
24
- colors? ? actual_colors : empty_colors
25
- end
26
-
27
- def actual_colors
28
- {
29
- red: "\e[31m", green: "\e[32m", yellow: "\e[33m",
30
- blue: "\e[34m", purple: "\e[35m", cyan: "\e[36m",
31
- underlined: "\e[4m", bold: "\e[1m",
32
- none: "", reset: "\e[0m"
33
- }
34
- end
35
-
36
- def empty_colors
37
- {
38
- red: "", green: "", yellow: "",
39
- blue: "", purple: "", cyan: "",
40
- underlined: "", bold: "",
41
- none: "", reset: ""
42
- }
43
- end
44
-
45
- def colors?
46
- if Config.log_color == 'auto'
47
- tty?
48
- else
49
- Config.log_color
50
- end
23
+ def use_colors?
24
+ @use_colors ||= (Config.log_color == 'auto' ? tty? : Config.log_color)
51
25
  end
52
26
 
53
27
  def tty?
54
28
  ENV['TTY'] == 'on' ? true : ENV['TTY'] == 'off' ? false : $stdout.tty?
55
29
  end
30
+
31
+ def strip_color_markers(text)
32
+ text.gsub(/\!([a-z]{6})\!/, '')
33
+ end
56
34
  end
57
35
  end
@@ -55,13 +55,13 @@ module Snapcrawl
55
55
  response = cache.get(url) { HTTParty.get url }
56
56
 
57
57
  if !response.success?
58
- $logger.warn "http error on %{purple}%{underlined}#{url}%{reset}, code: %{yellow}#{response.code}%{reset}, message: #{response.message.strip}"
58
+ $logger.warn "http error on !undpur!#{url}!txtrst!, code: !txtylw!#{response.code}!txtrst!, message: #{response.message.strip}"
59
59
  end
60
60
 
61
61
  response
62
62
 
63
63
  rescue => e
64
- $logger.error "http error on %{purple}%{underlined}#{url}%{reset} - %{red}#{e.class}%{reset}: #{e.message}"
64
+ $logger.error "http error on !undpur!#{url}!txtrst! - !txtred!#{e.class}!txtrst!: #{e.message}"
65
65
  nil
66
66
 
67
67
  end
@@ -95,7 +95,7 @@ module Snapcrawl
95
95
  begin
96
96
  link = Addressable::URI.join(url, link).to_s.dup
97
97
  rescue => e
98
- $logger.warn "%{red}#{e.class}%{reset}: #{e.message} on #{path} (link: #{link})"
98
+ $logger.warn "!txtred!#{e.class}!txtrst!: #{e.message} on #{path} (link: #{link})"
99
99
  return nil
100
100
  end
101
101
 
@@ -5,7 +5,7 @@ module Snapcrawl
5
5
  extend LogHelpers
6
6
 
7
7
  def self.new
8
- Logger.new(STDOUT, formatter: log_formatter, level: Config.log_level)
8
+ Logger.new($stdout, formatter: log_formatter, level: Config.log_level)
9
9
  end
10
10
  end
11
11
  end
@@ -1,3 +1,3 @@
1
1
  module Snapcrawl
2
- VERSION = "0.5.1"
2
+ VERSION = "0.5.2"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: snapcrawl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 0.5.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Danny Ben Shitrit
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-03-14 00:00:00.000000000 Z
11
+ date: 2021-02-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: colsole
@@ -166,7 +166,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
166
166
  - !ruby/object:Gem::Version
167
167
  version: '0'
168
168
  requirements: []
169
- rubygems_version: 3.0.3
169
+ rubygems_version: 3.2.3
170
170
  signing_key:
171
171
  specification_version: 4
172
172
  summary: Crawl a website and take screenshots (CLI + Library)