webstract 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/webstract.rb +4 -1
- data/lib/webstract/errors.rb +4 -0
- data/lib/webstract/screen_capture.rb +60 -0
- data/lib/webstract/screenshot.rb +3 -7
- data/lib/webstract/screenshot_backend.rb +64 -0
- data/lib/webstract/version.rb +1 -1
- metadata +63 -16
- data/.gitignore +0 -14
- data/Gemfile +0 -4
- data/Rakefile +0 -2
- data/webstract.gemspec +0 -24
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6243396588bd377a63b7bde8eda4753f74580d2e
|
4
|
+
data.tar.gz: 1a3a31eb9db5c4e83b197798d00a8c049ca6acb6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4a1b8514db48474677b8a2c51e59a4ad2da3a61d4f482372d2cc467dedfc98fbb8ad34abaee5fdca140b45afcf9f32cceefb473932da24a25761571e462c2a7e
|
7
|
+
data.tar.gz: 463d81732822cb8dea73e1c8055d71bc9f9824d8dc335d78ed1ef8557a5f48e2149da8132e74acc373e787e5c3658e4f05231a4816364304ee33d4e96e3db166
|
data/lib/webstract.rb
CHANGED
@@ -1,7 +1,10 @@
|
|
1
1
|
module Webstract
|
2
2
|
|
3
|
+
autoload :Errors, 'webstract/errors'
|
4
|
+
autoload :ScreenCapture, 'webstract/screen_capture'
|
5
|
+
autoload :ScreenshotBackend, 'webstract/screenshot_backend'
|
3
6
|
autoload :Screenshot, 'webstract/screenshot'
|
4
|
-
autoload :Favicon,
|
7
|
+
autoload :Favicon, 'webstract/favicon'
|
5
8
|
|
6
9
|
def self.screenshot(options = {})
|
7
10
|
Webstract::Screenshot.new(options)
|
@@ -0,0 +1,60 @@
|
|
1
|
+
require 'capybara/dsl'
|
2
|
+
|
3
|
+
module Webstract
|
4
|
+
class ScreenCapture
|
5
|
+
include Capybara::DSL
|
6
|
+
|
7
|
+
attr_reader :width, :height, :user_agent, :accept_language, :path
|
8
|
+
|
9
|
+
def initialize(opts = {})
|
10
|
+
Webstract::ScreenshotBackend.capybara_setup!
|
11
|
+
@width = opts.fetch(:width, Webstract::ScreenshotBackend.width)
|
12
|
+
@height = opts.fetch(:height, Webstract::ScreenshotBackend.height)
|
13
|
+
@user_agent = opts.fetch(:user_agent, Webstract::ScreenshotBackend.user_agent)
|
14
|
+
@accept_language = opts.fetch(:accept_language, Webstract::ScreenshotBackend.accept_language)
|
15
|
+
|
16
|
+
# Browser settings
|
17
|
+
page.driver.resize(@width, @height)
|
18
|
+
page.driver.headers = {
|
19
|
+
"User-Agent" => @user_agent,
|
20
|
+
'Accept-Language' => @accept_language
|
21
|
+
}
|
22
|
+
end
|
23
|
+
|
24
|
+
def start_session(&block)
|
25
|
+
Capybara.reset_sessions!
|
26
|
+
Capybara.current_session.instance_eval(&block) if block_given?
|
27
|
+
@session_started = true
|
28
|
+
self
|
29
|
+
end
|
30
|
+
|
31
|
+
# Captures a screenshot of +url+ saving it to +path+.
|
32
|
+
def capture(url, path, opts = {})
|
33
|
+
begin
|
34
|
+
# Default settings
|
35
|
+
@width = opts.fetch(:width, 120) if opts[:width]
|
36
|
+
@height = opts.fetch(:height, 90) if opts[:width]
|
37
|
+
|
38
|
+
# Reset session before visiting url
|
39
|
+
Capybara.reset_sessions! unless @session_started
|
40
|
+
@session_started = false
|
41
|
+
|
42
|
+
# Open page
|
43
|
+
visit(url)
|
44
|
+
|
45
|
+
# Timeout
|
46
|
+
sleep opts[:timeout] if opts[:timeout]
|
47
|
+
|
48
|
+
# Check response code
|
49
|
+
if page.driver.status_code.to_i == 200 || page.driver.status_code.to_i / 100 == 3
|
50
|
+
page.driver.save_screenshot(path, :full => true)
|
51
|
+
else
|
52
|
+
raise Webstract::Error.new("Could not fetch page: #{url.inspect}, error code: #{page.driver.status_code}")
|
53
|
+
end
|
54
|
+
rescue Capybara::Poltergeist::BrowserError, Capybara::Poltergeist::DeadClient, Capybara::Poltergeist::TimeoutError, Errno::EPIPE => e
|
55
|
+
# TODO: Handle Errno::EPIPE and Errno::ECONNRESET
|
56
|
+
raise Webstract::Error.new("Capybara error: #{e.message.inspect}")
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
data/lib/webstract/screenshot.rb
CHANGED
@@ -1,23 +1,19 @@
|
|
1
|
-
require 'webshot'
|
2
|
-
|
3
1
|
module Webstract
|
4
2
|
class Screenshot
|
5
|
-
attr_accessor :url, :path, :width, :height, :
|
3
|
+
attr_accessor :url, :path, :width, :height, :user_agent, :accept_language
|
6
4
|
attr_reader :handle
|
7
5
|
|
8
6
|
def initialize(options = {})
|
9
|
-
@handle =
|
7
|
+
@handle = Webstract::ScreenCapture.new(options)
|
10
8
|
|
11
9
|
options.each do |k, value|
|
12
10
|
setter = "#{k}="
|
13
11
|
self.public_send(setter, value) if self.respond_to?(setter)
|
14
12
|
end
|
15
|
-
|
16
|
-
@quality = 85 unless self.quality
|
17
13
|
end
|
18
14
|
|
19
15
|
def capture
|
20
|
-
handle.capture(url, path, width: width, height: height,
|
16
|
+
handle.capture(url, path, width: width, height: height, user_agent: user_agent, accept_language: accept_language)
|
21
17
|
end
|
22
18
|
|
23
19
|
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
require "capybara/dsl"
|
2
|
+
require "capybara/poltergeist"
|
3
|
+
require "active_support"
|
4
|
+
require "active_support/core_ext"
|
5
|
+
|
6
|
+
module Webstract
|
7
|
+
module ScreenshotBackend
|
8
|
+
|
9
|
+
USER_AGENTS = {
|
10
|
+
web: 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.43 Safari/537.31',
|
11
|
+
android: 'Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 4 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19',
|
12
|
+
ios: 'Mozilla/5.0 (iPhone; CPU iPhone OS 5_0 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9A334 Safari/7534.48.3'
|
13
|
+
}
|
14
|
+
|
15
|
+
## Browser settings
|
16
|
+
# Width
|
17
|
+
mattr_accessor :width
|
18
|
+
@@width = 1024
|
19
|
+
|
20
|
+
# Height
|
21
|
+
mattr_accessor :height
|
22
|
+
@@height = 768
|
23
|
+
|
24
|
+
mattr_accessor :accept_language
|
25
|
+
@@accept_language = 'en-us,en;q=0.5'
|
26
|
+
|
27
|
+
|
28
|
+
# User agent
|
29
|
+
class << self
|
30
|
+
|
31
|
+
def user_agent
|
32
|
+
@user_agent ||= USER_AGENT[:web]
|
33
|
+
end
|
34
|
+
def user_agent=(ua)
|
35
|
+
agent_string = USER_AGENT[ua]
|
36
|
+
raise(ArgumentError.new('must be one of #{USER_AGENTS.inspect}')) unless agent_string
|
37
|
+
@user_agent = agent_string
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
|
42
|
+
# Customize settings
|
43
|
+
def self.setup
|
44
|
+
yield(self)
|
45
|
+
end
|
46
|
+
|
47
|
+
# Capibara setup
|
48
|
+
def self.capybara_setup!
|
49
|
+
# By default Capybara will try to boot a rack application
|
50
|
+
# automatically. You might want to switch off Capybara's
|
51
|
+
# rack server if you are running against a remote application
|
52
|
+
Capybara.run_server = false
|
53
|
+
Capybara.register_driver :poltergeist do |app|
|
54
|
+
Capybara::Poltergeist::Driver.new(app, {
|
55
|
+
# Raise JavaScript errors to Ruby
|
56
|
+
js_errors: false,
|
57
|
+
# Additional command line options for PhantomJS
|
58
|
+
phantomjs_options: ['--ignore-ssl-errors=yes'],
|
59
|
+
})
|
60
|
+
end
|
61
|
+
Capybara.current_driver = :poltergeist
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
data/lib/webstract/version.rb
CHANGED
metadata
CHANGED
@@ -1,15 +1,63 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: webstract
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- John Faucett
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-10-
|
11
|
+
date: 2014-10-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: activesupport
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '4.1'
|
20
|
+
- - "<"
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: '5'
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '4.1'
|
30
|
+
- - "<"
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '5'
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: poltergeist
|
35
|
+
requirement: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - ">="
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '1.5'
|
40
|
+
type: :runtime
|
41
|
+
prerelease: false
|
42
|
+
version_requirements: !ruby/object:Gem::Requirement
|
43
|
+
requirements:
|
44
|
+
- - ">="
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '1.5'
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: faviconduit
|
49
|
+
requirement: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - ">="
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :runtime
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: '0'
|
13
61
|
- !ruby/object:Gem::Dependency
|
14
62
|
name: bundler
|
15
63
|
requirement: !ruby/object:Gem::Requirement
|
@@ -39,33 +87,33 @@ dependencies:
|
|
39
87
|
- !ruby/object:Gem::Version
|
40
88
|
version: '10.0'
|
41
89
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
90
|
+
name: rspec
|
43
91
|
requirement: !ruby/object:Gem::Requirement
|
44
92
|
requirements:
|
45
|
-
- - "
|
93
|
+
- - "~>"
|
46
94
|
- !ruby/object:Gem::Version
|
47
|
-
version: '
|
48
|
-
type: :
|
95
|
+
version: '3'
|
96
|
+
type: :development
|
49
97
|
prerelease: false
|
50
98
|
version_requirements: !ruby/object:Gem::Requirement
|
51
99
|
requirements:
|
52
|
-
- - "
|
100
|
+
- - "~>"
|
53
101
|
- !ruby/object:Gem::Version
|
54
|
-
version: '
|
102
|
+
version: '3'
|
55
103
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
104
|
+
name: pry
|
57
105
|
requirement: !ruby/object:Gem::Requirement
|
58
106
|
requirements:
|
59
107
|
- - ">="
|
60
108
|
- !ruby/object:Gem::Version
|
61
|
-
version:
|
62
|
-
type: :
|
109
|
+
version: 0.10.0
|
110
|
+
type: :development
|
63
111
|
prerelease: false
|
64
112
|
version_requirements: !ruby/object:Gem::Requirement
|
65
113
|
requirements:
|
66
114
|
- - ">="
|
67
115
|
- !ruby/object:Gem::Version
|
68
|
-
version:
|
116
|
+
version: 0.10.0
|
69
117
|
description: Extract images, favicons, and meta info from websites
|
70
118
|
email:
|
71
119
|
- jwaterfaucett@gmail.com
|
@@ -73,16 +121,15 @@ executables: []
|
|
73
121
|
extensions: []
|
74
122
|
extra_rdoc_files: []
|
75
123
|
files:
|
76
|
-
- ".gitignore"
|
77
|
-
- Gemfile
|
78
124
|
- LICENSE.txt
|
79
125
|
- README.md
|
80
|
-
- Rakefile
|
81
126
|
- lib/webstract.rb
|
127
|
+
- lib/webstract/errors.rb
|
82
128
|
- lib/webstract/favicon.rb
|
129
|
+
- lib/webstract/screen_capture.rb
|
83
130
|
- lib/webstract/screenshot.rb
|
131
|
+
- lib/webstract/screenshot_backend.rb
|
84
132
|
- lib/webstract/version.rb
|
85
|
-
- webstract.gemspec
|
86
133
|
homepage:
|
87
134
|
licenses:
|
88
135
|
- MIT
|
data/.gitignore
DELETED
data/Gemfile
DELETED
data/Rakefile
DELETED
data/webstract.gemspec
DELETED
@@ -1,24 +0,0 @@
|
|
1
|
-
# coding: utf-8
|
2
|
-
lib = File.expand_path('../lib', __FILE__)
|
3
|
-
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
-
require 'webstract/version'
|
5
|
-
|
6
|
-
Gem::Specification.new do |spec|
|
7
|
-
spec.name = "webstract"
|
8
|
-
spec.version = Webstract::VERSION
|
9
|
-
spec.authors = ["John Faucett"]
|
10
|
-
spec.email = ["jwaterfaucett@gmail.com"]
|
11
|
-
spec.summary = 'Extract information from websites'
|
12
|
-
spec.description = 'Extract images, favicons, and meta info from websites'
|
13
|
-
spec.license = 'MIT'
|
14
|
-
|
15
|
-
spec.files = `git ls-files -z`.split("\x0")
|
16
|
-
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
17
|
-
spec.require_paths = ['lib']
|
18
|
-
|
19
|
-
spec.add_development_dependency 'bundler', '~> 1.7'
|
20
|
-
spec.add_development_dependency 'rake', '~> 10.0'
|
21
|
-
|
22
|
-
spec.add_runtime_dependency 'webshot'
|
23
|
-
spec.add_runtime_dependency 'faviconduit'
|
24
|
-
end
|