website_screenshot 1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +20 -0
- data/README.rdoc +55 -0
- data/Rakefile +27 -0
- data/bin/website-screenshot +41 -0
- data/lib/website_screenshot.rb +150 -0
- data/website_screenshot.gemspec +20 -0
- metadata +78 -0
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2010 Daniel Mircea, OkapiStudio
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
= Website Screenshot
|
2
|
+
|
3
|
+
Creates a new webkit window using the QT framework of a specified url and saves a screenshot when the page has finished loading.
|
4
|
+
|
5
|
+
|
6
|
+
== Usage
|
7
|
+
|
8
|
+
From ruby:
|
9
|
+
|
10
|
+
ws = WebsiteScreenshot.new :url => "http://google.com"
|
11
|
+
ws.get
|
12
|
+
|
13
|
+
Check the docs for additional options.
|
14
|
+
|
15
|
+
Command line:
|
16
|
+
|
17
|
+
$ website-screenshot -u http://google.com
|
18
|
+
|
19
|
+
You'll probably want to use this library on a webserver. You can easily do so without installing the full X stack by launching it using xvfb:
|
20
|
+
|
21
|
+
$ xvfb-run --server-args="-screen 0, 1400x900x24" --size=1400x900 --url=http://viseztrance.com --file=vise.png
|
22
|
+
|
23
|
+
|
24
|
+
== Flash and other plugins
|
25
|
+
|
26
|
+
If flash or any other plugin is installed it will be used while rendering the page. Locations can be set with environment variables such as $QTWEBKIT_PLUGIN_PATH.
|
27
|
+
|
28
|
+
|
29
|
+
== Limitations
|
30
|
+
|
31
|
+
Webkit segfaults on rare occasions killing the entire ruby process with it - that's why I encourage calling the script from the command line.
|
32
|
+
|
33
|
+
The basic qt webkit browser doesn't handle multiple redirects very well, for this reason the _url_ is initially discovered using _curl_. Feel free to fork this and get around the limitation.
|
34
|
+
|
35
|
+
|
36
|
+
== Source code
|
37
|
+
|
38
|
+
The source code is hosted on Github: http://github.com/viseztrance/website_screenshot
|
39
|
+
|
40
|
+
To get WebsiteScreenshot from source:
|
41
|
+
|
42
|
+
git clone git@github.com:viseztrance/website_screenshot.git
|
43
|
+
|
44
|
+
|
45
|
+
== Aknowledgements and implementations
|
46
|
+
|
47
|
+
This code is the heart of a pet project I wrote in my spare time named {mywebsit.es}[http://mywebsit.es].
|
48
|
+
|
49
|
+
I also wrote a {web service}[http://github.com/viseztrance/website_screenshot_service] which I hope is a good starting point to anyone who implements this code.
|
50
|
+
|
51
|
+
|
52
|
+
== License
|
53
|
+
|
54
|
+
This package is licensed under the MIT license and/or the Creative
|
55
|
+
Commons Attribution-ShareAlike.
|
data/Rakefile
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'rake/rdoctask'
|
2
|
+
|
3
|
+
|
4
|
+
spec = Gem::Specification.load(File.expand_path("website_screenshot.gemspec", File.dirname(__FILE__)))
|
5
|
+
|
6
|
+
# Create the documentation.
|
7
|
+
Rake::RDocTask.new do |rdoc|
|
8
|
+
rdoc.rdoc_files.include "README.rdoc", "lib/**/*.rb"
|
9
|
+
rdoc.options = spec.rdoc_options
|
10
|
+
end
|
11
|
+
|
12
|
+
desc "Push new release to rubyforge and git tag"
|
13
|
+
task :push do
|
14
|
+
sh "git push"
|
15
|
+
puts "Tagging version #{spec.version} .."
|
16
|
+
sh "git tag v#{spec.version}"
|
17
|
+
sh "git push --tag"
|
18
|
+
puts "Building and pushing gem .."
|
19
|
+
sh "gem build #{spec.name}.gemspec"
|
20
|
+
sh "gem push #{spec.name}-#{spec.version}.gem"
|
21
|
+
end
|
22
|
+
|
23
|
+
desc "Install #{spec.name} locally"
|
24
|
+
task :install do
|
25
|
+
sh "gem build #{spec.name}.gemspec"
|
26
|
+
sh "gem install #{spec.name}-#{spec.version}.gem"
|
27
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "optparse"
|
4
|
+
require "website_screenshot"
|
5
|
+
|
6
|
+
options = {}
|
7
|
+
|
8
|
+
ARGV.options do |opt|
|
9
|
+
script_name = File.basename($0)
|
10
|
+
|
11
|
+
opt.set_summary_indent(" ")
|
12
|
+
opt.banner = "Usage: #{script_name} [options]"
|
13
|
+
opt.define_head "Takes screenshots of webpages."
|
14
|
+
|
15
|
+
opt.separator ""
|
16
|
+
|
17
|
+
opt.on("-u", "--url=val", String,
|
18
|
+
"Website url, eg. http://google.com") { |options[:url]| }
|
19
|
+
opt.on("-s", "--size=val", String,
|
20
|
+
"Window size, eg. 1024x768") { |options[:size]| }
|
21
|
+
opt.on("-f", "--file=val", String,
|
22
|
+
"Image file name for the saved output. Can also be a path.") { |options[:file_name]| }
|
23
|
+
opt.on("-v", "--verbose=val", String,
|
24
|
+
"Display progress during loading.") { |options[:verbose]| }
|
25
|
+
opt.on("-t", "--timeout=val", Integer,
|
26
|
+
"Timeout before killing the page.") { |options[:render_timeout]| }
|
27
|
+
|
28
|
+
opt.separator ""
|
29
|
+
|
30
|
+
opt.on_tail("-h", "--help", "Show this help message.") { puts opt; exit }
|
31
|
+
|
32
|
+
opt.parse!
|
33
|
+
|
34
|
+
if options[:url].nil?
|
35
|
+
puts "The url option is required (--url). -h for additional information."
|
36
|
+
else
|
37
|
+
screenshot = WebsiteScreenshot.new options
|
38
|
+
puts screenshot.get
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
@@ -0,0 +1,150 @@
|
|
1
|
+
#
|
2
|
+
# = website_screenshot.rb - Takes screenshots of webpages
|
3
|
+
#
|
4
|
+
# Author:: Daniel Mircea daniel@viseztrance.com
|
5
|
+
# Copyright:: Copyright (c) 2011 Daniel Mircea
|
6
|
+
# License:: MIT and/or Creative Commons Attribution-ShareAlike
|
7
|
+
|
8
|
+
require "Qt4"
|
9
|
+
require "qtwebkit"
|
10
|
+
|
11
|
+
class WebsiteScreenshot
|
12
|
+
|
13
|
+
VERSION = Gem::Specification.load(File.expand_path("../website_screenshot.gemspec", File.dirname(__FILE__))).version.to_s
|
14
|
+
|
15
|
+
@@state = "waiting"
|
16
|
+
@@progress = 0
|
17
|
+
@@started_at = nil
|
18
|
+
|
19
|
+
# Timeout before killing the page.
|
20
|
+
attr_accessor :render_timeout
|
21
|
+
|
22
|
+
attr_accessor :check_loading_status_interval #:nodoc:
|
23
|
+
|
24
|
+
# Filename to save the file to.
|
25
|
+
attr_accessor :file_name
|
26
|
+
|
27
|
+
# Window size.
|
28
|
+
attr_accessor :size
|
29
|
+
|
30
|
+
# Page url.
|
31
|
+
attr_accessor :url
|
32
|
+
|
33
|
+
attr_accessor :verbose #:nodoc:
|
34
|
+
|
35
|
+
# Instantiates a new object.
|
36
|
+
# ==== Options:
|
37
|
+
# [*url*] Website path. URL redirects are automatically resolved using +curl+.
|
38
|
+
# [*file_name*] Name of the saved image. Defaults to output.png.
|
39
|
+
# [*render_timeout*] Timeout before killing the page. Defaults at two minutes.
|
40
|
+
# [*check_loading_status_interval*] Interval between page status checks.
|
41
|
+
# [*size*] Window size the page is being rendered into. Defaults at 1360x768.
|
42
|
+
# [*verbose*] Outputs page load progress.
|
43
|
+
def initialize(args)
|
44
|
+
self.render_timeout = args[:render_timeout] || 120
|
45
|
+
self.check_loading_status_interval = args[:check_loading_status_interval] || 0.1
|
46
|
+
self.file_name = args[:file_name] || "output.png"
|
47
|
+
self.size = args[:size] || "1360x768"
|
48
|
+
self.url = args[:url]
|
49
|
+
self.verbose = args[:verbose] # default FALSE
|
50
|
+
end
|
51
|
+
|
52
|
+
# Renders the website and saves a screenshot.
|
53
|
+
# ==== Returns:
|
54
|
+
# * If the webpage began rendering, the load percentage. A screenshot is saved if the page has been +50%+ loaded or more.
|
55
|
+
# * +false+ if for some reason the +url+ could not be opened, or the browser initialized.
|
56
|
+
def get
|
57
|
+
|
58
|
+
app = Qt::Application.new(ARGV)
|
59
|
+
webview = Qt::WebView.new()
|
60
|
+
|
61
|
+
webview.connect(SIGNAL("loadStarted()")) do
|
62
|
+
@@started_at = Time.now.to_i
|
63
|
+
end
|
64
|
+
|
65
|
+
webview.connect(SIGNAL("loadFinished(bool)")) do |result|
|
66
|
+
if result
|
67
|
+
@@state = "finished-success"
|
68
|
+
else
|
69
|
+
@@state = "finished-fail"
|
70
|
+
@@progress = false
|
71
|
+
end
|
72
|
+
suspend_thread # Give it enough time to switch to the sentinel thread and avoid an empty exec loop.
|
73
|
+
end
|
74
|
+
|
75
|
+
webview.connect(SIGNAL("loadProgress(int)")) do |progress|
|
76
|
+
puts "#{progress}%" if verbose
|
77
|
+
@@progress = progress
|
78
|
+
suspend_thread if has_reached_time_out?
|
79
|
+
end
|
80
|
+
|
81
|
+
# Enable flash, javascript and some other sensible browsing options.
|
82
|
+
webview::settings()::setAttribute(Qt::WebSettings::PluginsEnabled, true)
|
83
|
+
webview::settings()::setAttribute(Qt::WebSettings::JavascriptCanOpenWindows, false)
|
84
|
+
webview::settings()::setAttribute(Qt::WebSettings::PrivateBrowsingEnabled, true)
|
85
|
+
webview::settings()::setAttribute(Qt::WebSettings::JavascriptEnabled, true)
|
86
|
+
|
87
|
+
# Hide the scrollbars.
|
88
|
+
webview.page.mainFrame.setScrollBarPolicy(Qt::Horizontal, Qt::ScrollBarAlwaysOff)
|
89
|
+
webview.page.mainFrame.setScrollBarPolicy(Qt::Vertical, Qt::ScrollBarAlwaysOff)
|
90
|
+
|
91
|
+
webview.load(Qt::Url.new(url))
|
92
|
+
webview.resize(size)
|
93
|
+
webview.show
|
94
|
+
render_page_thread = Thread.new do
|
95
|
+
app.exec
|
96
|
+
end
|
97
|
+
|
98
|
+
check_status_thread = Thread.new do
|
99
|
+
while true do
|
100
|
+
sleep check_loading_status_interval
|
101
|
+
if @@state =~ /^finished/ || has_reached_time_out?
|
102
|
+
# Save a screenshot if page finished loaded or it has timed out with 50%+ completion.
|
103
|
+
save(webview) if @@state == "finished-success" || @@progress >= 50
|
104
|
+
render_page_thread.kill
|
105
|
+
break
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
check_status_thread.join
|
111
|
+
render_page_thread.join
|
112
|
+
|
113
|
+
return @@progress
|
114
|
+
|
115
|
+
end
|
116
|
+
|
117
|
+
# Sets the window size.
|
118
|
+
#
|
119
|
+
# The geometry must have the following format: _widthxheight_.
|
120
|
+
# ==== Returns:
|
121
|
+
# A Qt::Size object.
|
122
|
+
def size=(geometry)
|
123
|
+
geometry_information = geometry.split("x")
|
124
|
+
@size = Qt::Size.new(geometry_information.first.to_i, geometry_information.last.to_i)
|
125
|
+
end
|
126
|
+
|
127
|
+
# Sets the website url.
|
128
|
+
#
|
129
|
+
# Calls the operating systems +curl+ command to follow redirects.
|
130
|
+
def url=(path)
|
131
|
+
@url = %x[curl "#{path}" -A "Mozilla/5.0 (QtWebkit; WebsiteScreenshot)" -L -o /dev/null -w %{url_effective}]
|
132
|
+
end
|
133
|
+
|
134
|
+
private
|
135
|
+
|
136
|
+
def has_reached_time_out?
|
137
|
+
Time.now.to_i >= (@@started_at + render_timeout)
|
138
|
+
end
|
139
|
+
|
140
|
+
def suspend_thread
|
141
|
+
sleep(30)
|
142
|
+
end
|
143
|
+
|
144
|
+
def save(webview)
|
145
|
+
sleep(5) # Wait a few seconds to allow some/any of the animations to take place
|
146
|
+
pixmap = Qt::Pixmap.grabWindow(webview.window.winId)
|
147
|
+
pixmap.save(file_name, File.extname(file_name).tr(".",""))
|
148
|
+
end
|
149
|
+
|
150
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
spec = Gem::Specification.new do |spec|
|
2
|
+
spec.name = "website_screenshot"
|
3
|
+
spec.version = "1.0"
|
4
|
+
spec.summary = "Takes screenshots of webpages"
|
5
|
+
spec.description = <<-EOF
|
6
|
+
Creates a new webkit window using the QT framework of a specified url and saves a screenshot when the page has finished loading.
|
7
|
+
EOF
|
8
|
+
|
9
|
+
spec.authors << "Daniel Mircea"
|
10
|
+
spec.email = "daniel@viseztrance.com"
|
11
|
+
spec.homepage = "http://github.com/viseztrance/website_screenshot"
|
12
|
+
|
13
|
+
spec.files = Dir["{bin,lib,docs}/**/*"] + ["README.rdoc", "LICENSE", "Rakefile", "website_screenshot.gemspec"]
|
14
|
+
spec.executables = "website-screenshot"
|
15
|
+
|
16
|
+
spec.has_rdoc = true
|
17
|
+
spec.rdoc_options << "--main" << "README.rdoc" << "--title" << "Website Screenshot" << "--line-numbers"
|
18
|
+
"--webcvs" << "http://github.com/viseztrance/website_screenshot"
|
19
|
+
spec.extra_rdoc_files = ["README.rdoc", "LICENSE"]
|
20
|
+
end
|
metadata
ADDED
@@ -0,0 +1,78 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: website_screenshot
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 15
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 1
|
8
|
+
- 0
|
9
|
+
version: "1.0"
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Daniel Mircea
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2011-02-27 00:00:00 +02:00
|
18
|
+
default_executable:
|
19
|
+
dependencies: []
|
20
|
+
|
21
|
+
description: |
|
22
|
+
Creates a new webkit window using the QT framework of a specified url and saves a screenshot when the page has finished loading.
|
23
|
+
|
24
|
+
email: daniel@viseztrance.com
|
25
|
+
executables:
|
26
|
+
- website-screenshot
|
27
|
+
extensions: []
|
28
|
+
|
29
|
+
extra_rdoc_files:
|
30
|
+
- README.rdoc
|
31
|
+
- LICENSE
|
32
|
+
files:
|
33
|
+
- bin/website-screenshot
|
34
|
+
- lib/website_screenshot.rb
|
35
|
+
- README.rdoc
|
36
|
+
- LICENSE
|
37
|
+
- Rakefile
|
38
|
+
- website_screenshot.gemspec
|
39
|
+
has_rdoc: true
|
40
|
+
homepage: http://github.com/viseztrance/website_screenshot
|
41
|
+
licenses: []
|
42
|
+
|
43
|
+
post_install_message:
|
44
|
+
rdoc_options:
|
45
|
+
- --main
|
46
|
+
- README.rdoc
|
47
|
+
- --title
|
48
|
+
- Website Screenshot
|
49
|
+
- --line-numbers
|
50
|
+
require_paths:
|
51
|
+
- lib
|
52
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
53
|
+
none: false
|
54
|
+
requirements:
|
55
|
+
- - ">="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
hash: 3
|
58
|
+
segments:
|
59
|
+
- 0
|
60
|
+
version: "0"
|
61
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
62
|
+
none: false
|
63
|
+
requirements:
|
64
|
+
- - ">="
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
hash: 3
|
67
|
+
segments:
|
68
|
+
- 0
|
69
|
+
version: "0"
|
70
|
+
requirements: []
|
71
|
+
|
72
|
+
rubyforge_project:
|
73
|
+
rubygems_version: 1.3.7
|
74
|
+
signing_key:
|
75
|
+
specification_version: 3
|
76
|
+
summary: Takes screenshots of webpages
|
77
|
+
test_files: []
|
78
|
+
|