snapcrawl 0.5.1 → 0.5.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 83077ce26e7825a14948d6048ff92ee5cc15a17e3eeaae5687d891b68d83cd92
4
- data.tar.gz: 15f264f683c08f9c0a2d379fa76e3dad1698a30808fb3694ccf334b45450c8ff
3
+ metadata.gz: 62a293da259afce5690315f27f2bbcd881e495a3d1b5344eb9ed9e2c46bd4a4d
4
+ data.tar.gz: d600fdbcd2344e5a19f853cbea67a0d8ad0c365a38d00aa4de8d02dd6e52e5b0
5
5
  SHA512:
6
- metadata.gz: 67d1a62d2c24dbf675c7e83449782ed88e7de62122082033239bc9ce436566dea5f1bad5b442784f1a476d05d75f20a52ec17c3a0fd6bd3a3a4ef7dcf7f76d07
7
- data.tar.gz: ab17a294bad674814dd1e47407f9da58ae9c81dd1becee0db9d57d944c41ea6727cad6123b03797bd238b061b3138d9c4c42b32bd7f384a04716aeb4d93ee670
6
+ metadata.gz: 3ebdb2355480bacd7f7a6faba264a31086e68c1864c692607fdb6fbc11df210eee17af936ab63305484ee46ac473d50b4033be11e995b51b9050b359c81dd906
7
+ data.tar.gz: 42a0a9f048fe9b5b1b04426d444710a256ccc8e9a914e3277f062c4ebf760d50a018c1f189e7b0cebced1c236f5d13ca56ab4abbf808a5ec4812bf9a754a9343
data/README.md CHANGED
@@ -27,7 +27,7 @@ You can run Snapcrawl by using this docker image (which contains all the
27
27
  necessary prerequisites):
28
28
 
29
29
  ```shell
30
- $ alias snapcrawl='docker run --rm -it --volume $PWD:/app dannyben/snapcrawl'
30
+ $ alias snapcrawl='docker run --rm -it --network host --volume "$PWD:/app" dannyben/snapcrawl'
31
31
  ```
32
32
 
33
33
  For more information on the Docker image, refer to the [docker-snapcrawl][3] repository.
@@ -16,10 +16,10 @@ module Snapcrawl
16
16
  # Config. The $logger is available, but it was not yet fully
17
17
  # configured with log_level etc.
18
18
  if File.exist? file
19
- # $logger.debug "loading config file %{green}#{file}%{reset}"
19
+ # $logger.debug "loading config file !txtgrn!#{file}"
20
20
  push file
21
21
  else
22
- # $logger.debug "creating config file %{green}#{file}%{reset}"
22
+ # $logger.debug "creating config file !txtgrn!#{file}"
23
23
  create_config file
24
24
  end
25
25
  end
@@ -7,7 +7,7 @@ module Snapcrawl
7
7
  attr_reader :url
8
8
 
9
9
  def initialize(url)
10
- $logger.debug "initializing crawler with %{green}#{url}%{reset}"
10
+ $logger.debug "initializing crawler with !txtgrn!#{url}"
11
11
 
12
12
  config_for_display = Config.settings.dup
13
13
  config_for_display['name_template'] = '%%{url}'
@@ -25,7 +25,7 @@ module Snapcrawl
25
25
  private
26
26
 
27
27
  def process_todo
28
- $logger.debug "processing queue: %{green}#{todo.count} remaining%{reset}"
28
+ $logger.debug "processing queue: !txtgrn!#{todo.count} remaining"
29
29
 
30
30
  url, page = todo.shift
31
31
  done.push url
@@ -40,12 +40,12 @@ module Snapcrawl
40
40
  next if todo.has_key?(sub_page) or done.include?(sub_page)
41
41
 
42
42
  if Config.url_whitelist and sub_page.path !~ /#{Config.url_whitelist}/
43
- $logger.debug "ignoring %{purple}%{underlined}#{sub_page.url}%{reset}, reason: whitelist"
43
+ $logger.debug "ignoring !undpur!#{sub_page.url}!txtrst!, reason: whitelist"
44
44
  next
45
45
  end
46
46
 
47
47
  if Config.url_blacklist and sub_page.path =~ /#{Config.url_blacklist}/
48
- $logger.debug "ignoring %{purple}%{underlined}#{sub_page.url}%{reset}, reason: blacklist"
48
+ $logger.debug "ignoring !undpur!#{sub_page.url}!txtrst!, reason: blacklist"
49
49
  next
50
50
  end
51
51
 
@@ -56,7 +56,7 @@ module Snapcrawl
56
56
  def process_page(page)
57
57
  outfile = "#{Config.snaps_dir}/#{Config.name_template}.png" % { url: page.url.to_slug }
58
58
 
59
- $logger.info "processing %{purple}%{underlined}#{page.url}%{reset}, depth: #{page.depth}"
59
+ $logger.info "processing !undpur!#{page.url}!txtrst!, depth: #{page.depth}"
60
60
 
61
61
  if !page.valid?
62
62
  $logger.debug "page #{page.path} is invalid, aborting process"
@@ -66,7 +66,7 @@ module Snapcrawl
66
66
  if file_fresh? outfile
67
67
  $logger.info "screenshot for #{page.path} already exists"
68
68
  else
69
- $logger.info "%{bold}capturing screenshot for #{page.path}%{reset}"
69
+ $logger.info "!bldgrn!capturing screenshot for #{page.path}"
70
70
  save_screenshot page, outfile
71
71
  end
72
72
 
@@ -76,7 +76,7 @@ module Snapcrawl
76
76
  def save_screenshot(page, outfile)
77
77
  page.save_screenshot outfile
78
78
  rescue => e
79
- $logger.error "screenshot error on %{purple}%{underlined}#{page.path}%{reset} - %{red}#{e.class}%{reset}: #{e.message}"
79
+ $logger.error "screenshot error on !undpur!#{page.path}!txtrst! - !txtred!#{e.class}!txtrst!: #{e.message}"
80
80
  end
81
81
 
82
82
  def file_fresh?(file)
@@ -8,10 +8,10 @@ module Snapcrawl
8
8
  def verify
9
9
  return if @verified
10
10
 
11
- $logger.debug 'verifying %{green}phantomjs%{reset} is present'
11
+ $logger.debug 'verifying !txtgrn!phantomjs!txtrst! is present'
12
12
  raise MissingPhantomJS unless command_exist? "phantomjs"
13
13
 
14
- $logger.debug 'verifying %{green}imagemagick%{reset} is present'
14
+ $logger.debug 'verifying !txtgrn!imagemagick!txtrst! is present'
15
15
  raise MissingImageMagick unless command_exist? "convert"
16
16
 
17
17
  @verified = true
@@ -1,57 +1,35 @@
1
+ require 'colsole'
2
+
1
3
  module Snapcrawl
2
4
  module LogHelpers
5
+ include Colsole
6
+
3
7
  SEVERITY_COLORS = {
4
- 'INFO' => :blue,
5
- 'WARN' => :yellow,
6
- 'ERROR' => :red,
7
- 'FATAL' => :red,
8
- 'DEBUG' => :cyan
8
+ 'INFO' => :txtblu,
9
+ 'WARN' => :txtylw,
10
+ 'ERROR' => :txtred,
11
+ 'FATAL' => :txtred,
12
+ 'DEBUG' => :txtcyn
9
13
  }
10
14
 
11
15
  def log_formatter
12
16
  proc do |severity, _time, _prog, message|
13
17
  severity_color = SEVERITY_COLORS[severity]
14
-
15
- "%{#{severity_color}}#{severity.rjust 5}%{reset} : #{message}\n" % log_colors
18
+ line = "!#{severity_color}!#{severity.rjust 5}!txtrst! : #{message}\n"
19
+ use_colors? ? colorize(line) : strip_color_markers(line)
16
20
  end
17
21
  end
18
22
 
19
- def log_colors
20
- @log_colors ||= log_colors!
21
- end
22
-
23
- def log_colors!
24
- colors? ? actual_colors : empty_colors
25
- end
26
-
27
- def actual_colors
28
- {
29
- red: "\e[31m", green: "\e[32m", yellow: "\e[33m",
30
- blue: "\e[34m", purple: "\e[35m", cyan: "\e[36m",
31
- underlined: "\e[4m", bold: "\e[1m",
32
- none: "", reset: "\e[0m"
33
- }
34
- end
35
-
36
- def empty_colors
37
- {
38
- red: "", green: "", yellow: "",
39
- blue: "", purple: "", cyan: "",
40
- underlined: "", bold: "",
41
- none: "", reset: ""
42
- }
43
- end
44
-
45
- def colors?
46
- if Config.log_color == 'auto'
47
- tty?
48
- else
49
- Config.log_color
50
- end
23
+ def use_colors?
24
+ @use_colors ||= (Config.log_color == 'auto' ? tty? : Config.log_color)
51
25
  end
52
26
 
53
27
  def tty?
54
28
  ENV['TTY'] == 'on' ? true : ENV['TTY'] == 'off' ? false : $stdout.tty?
55
29
  end
30
+
31
+ def strip_color_markers(text)
32
+ text.gsub(/\!([a-z]{6})\!/, '')
33
+ end
56
34
  end
57
35
  end
@@ -55,13 +55,13 @@ module Snapcrawl
55
55
  response = cache.get(url) { HTTParty.get url }
56
56
 
57
57
  if !response.success?
58
- $logger.warn "http error on %{purple}%{underlined}#{url}%{reset}, code: %{yellow}#{response.code}%{reset}, message: #{response.message.strip}"
58
+ $logger.warn "http error on !undpur!#{url}!txtrst!, code: !txtylw!#{response.code}!txtrst!, message: #{response.message.strip}"
59
59
  end
60
60
 
61
61
  response
62
62
 
63
63
  rescue => e
64
- $logger.error "http error on %{purple}%{underlined}#{url}%{reset} - %{red}#{e.class}%{reset}: #{e.message}"
64
+ $logger.error "http error on !undpur!#{url}!txtrst! - !txtred!#{e.class}!txtrst!: #{e.message}"
65
65
  nil
66
66
 
67
67
  end
@@ -95,7 +95,7 @@ module Snapcrawl
95
95
  begin
96
96
  link = Addressable::URI.join(url, link).to_s.dup
97
97
  rescue => e
98
- $logger.warn "%{red}#{e.class}%{reset}: #{e.message} on #{path} (link: #{link})"
98
+ $logger.warn "!txtred!#{e.class}!txtrst!: #{e.message} on #{path} (link: #{link})"
99
99
  return nil
100
100
  end
101
101
 
@@ -5,7 +5,7 @@ module Snapcrawl
5
5
  extend LogHelpers
6
6
 
7
7
  def self.new
8
- Logger.new(STDOUT, formatter: log_formatter, level: Config.log_level)
8
+ Logger.new($stdout, formatter: log_formatter, level: Config.log_level)
9
9
  end
10
10
  end
11
11
  end
@@ -1,3 +1,3 @@
1
1
  module Snapcrawl
2
- VERSION = "0.5.1"
2
+ VERSION = "0.5.2"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: snapcrawl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 0.5.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Danny Ben Shitrit
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-03-14 00:00:00.000000000 Z
11
+ date: 2021-02-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: colsole
@@ -166,7 +166,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
166
166
  - !ruby/object:Gem::Version
167
167
  version: '0'
168
168
  requirements: []
169
- rubygems_version: 3.0.3
169
+ rubygems_version: 3.2.3
170
170
  signing_key:
171
171
  specification_version: 4
172
172
  summary: Crawl a website and take screenshots (CLI + Library)