website_screenshot 1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +20 -0
- data/README.rdoc +55 -0
- data/Rakefile +27 -0
- data/bin/website-screenshot +41 -0
- data/lib/website_screenshot.rb +150 -0
- data/website_screenshot.gemspec +20 -0
- metadata +78 -0
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2010 Daniel Mircea, OkapiStudio
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
= Website Screenshot
|
2
|
+
|
3
|
+
Creates a new webkit window using the QT framework of a specified url and saves a screenshot when the page has finished loading.
|
4
|
+
|
5
|
+
|
6
|
+
== Usage
|
7
|
+
|
8
|
+
From ruby:
|
9
|
+
|
10
|
+
ws = WebsiteScreenshot.new :url => "http://google.com"
|
11
|
+
ws.get
|
12
|
+
|
13
|
+
Check the docs for additional options.
|
14
|
+
|
15
|
+
Command line:
|
16
|
+
|
17
|
+
$ website-screenshot -u http://google.com
|
18
|
+
|
19
|
+
You'll probably want to use this library on a webserver. You can easily do so without installing the full X stack by launching it using xvfb:
|
20
|
+
|
21
|
+
$ xvfb-run --server-args="-screen 0, 1400x900x24" --size=1400x900 --url=http://viseztrance.com --file=vise.png
|
22
|
+
|
23
|
+
|
24
|
+
== Flash and other plugins
|
25
|
+
|
26
|
+
If flash or any other plugin is installed it will be used while rendering the page. Locations can be set with environment variables such as $QTWEBKIT_PLUGIN_PATH.
|
27
|
+
|
28
|
+
|
29
|
+
== Limitations
|
30
|
+
|
31
|
+
Webkit segfaults on rare occasions killing the entire ruby process with it - that's why I encourage calling the script from the command line.
|
32
|
+
|
33
|
+
The basic qt webkit browser doesn't handle multiple redirects very well, for this reason the _url_ is initially discovered using _curl_. Feel free to fork this and get around the limitation.
|
34
|
+
|
35
|
+
|
36
|
+
== Source code
|
37
|
+
|
38
|
+
The source code is hosted on Github: http://github.com/viseztrance/website_screenshot
|
39
|
+
|
40
|
+
To get WebsiteScreenshot from source:
|
41
|
+
|
42
|
+
git clone git@github.com:viseztrance/website_screenshot.git
|
43
|
+
|
44
|
+
|
45
|
+
== Aknowledgements and implementations
|
46
|
+
|
47
|
+
This code is the heart of a pet project I wrote in my spare time named {mywebsit.es}[http://mywebsit.es].
|
48
|
+
|
49
|
+
I also wrote a {web service}[http://github.com/viseztrance/website_screenshot_service] which I hope is a good starting point to anyone who implements this code.
|
50
|
+
|
51
|
+
|
52
|
+
== License
|
53
|
+
|
54
|
+
This package is licensed under the MIT license and/or the Creative
|
55
|
+
Commons Attribution-ShareAlike.
|
data/Rakefile
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'rake/rdoctask'
|
2
|
+
|
3
|
+
|
4
|
+
spec = Gem::Specification.load(File.expand_path("website_screenshot.gemspec", File.dirname(__FILE__)))
|
5
|
+
|
6
|
+
# Create the documentation.
|
7
|
+
Rake::RDocTask.new do |rdoc|
|
8
|
+
rdoc.rdoc_files.include "README.rdoc", "lib/**/*.rb"
|
9
|
+
rdoc.options = spec.rdoc_options
|
10
|
+
end
|
11
|
+
|
12
|
+
desc "Push new release to rubyforge and git tag"
|
13
|
+
task :push do
|
14
|
+
sh "git push"
|
15
|
+
puts "Tagging version #{spec.version} .."
|
16
|
+
sh "git tag v#{spec.version}"
|
17
|
+
sh "git push --tag"
|
18
|
+
puts "Building and pushing gem .."
|
19
|
+
sh "gem build #{spec.name}.gemspec"
|
20
|
+
sh "gem push #{spec.name}-#{spec.version}.gem"
|
21
|
+
end
|
22
|
+
|
23
|
+
desc "Install #{spec.name} locally"
|
24
|
+
task :install do
|
25
|
+
sh "gem build #{spec.name}.gemspec"
|
26
|
+
sh "gem install #{spec.name}-#{spec.version}.gem"
|
27
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "optparse"
|
4
|
+
require "website_screenshot"
|
5
|
+
|
6
|
+
options = {}
|
7
|
+
|
8
|
+
ARGV.options do |opt|
|
9
|
+
script_name = File.basename($0)
|
10
|
+
|
11
|
+
opt.set_summary_indent(" ")
|
12
|
+
opt.banner = "Usage: #{script_name} [options]"
|
13
|
+
opt.define_head "Takes screenshots of webpages."
|
14
|
+
|
15
|
+
opt.separator ""
|
16
|
+
|
17
|
+
opt.on("-u", "--url=val", String,
|
18
|
+
"Website url, eg. http://google.com") { |options[:url]| }
|
19
|
+
opt.on("-s", "--size=val", String,
|
20
|
+
"Window size, eg. 1024x768") { |options[:size]| }
|
21
|
+
opt.on("-f", "--file=val", String,
|
22
|
+
"Image file name for the saved output. Can also be a path.") { |options[:file_name]| }
|
23
|
+
opt.on("-v", "--verbose=val", String,
|
24
|
+
"Display progress during loading.") { |options[:verbose]| }
|
25
|
+
opt.on("-t", "--timeout=val", Integer,
|
26
|
+
"Timeout before killing the page.") { |options[:render_timeout]| }
|
27
|
+
|
28
|
+
opt.separator ""
|
29
|
+
|
30
|
+
opt.on_tail("-h", "--help", "Show this help message.") { puts opt; exit }
|
31
|
+
|
32
|
+
opt.parse!
|
33
|
+
|
34
|
+
if options[:url].nil?
|
35
|
+
puts "The url option is required (--url). -h for additional information."
|
36
|
+
else
|
37
|
+
screenshot = WebsiteScreenshot.new options
|
38
|
+
puts screenshot.get
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
@@ -0,0 +1,150 @@
|
|
1
|
+
#
|
2
|
+
# = website_screenshot.rb - Takes screenshots of webpages
|
3
|
+
#
|
4
|
+
# Author:: Daniel Mircea daniel@viseztrance.com
|
5
|
+
# Copyright:: Copyright (c) 2011 Daniel Mircea
|
6
|
+
# License:: MIT and/or Creative Commons Attribution-ShareAlike
|
7
|
+
|
8
|
+
require "Qt4"
|
9
|
+
require "qtwebkit"
|
10
|
+
|
11
|
+
class WebsiteScreenshot
|
12
|
+
|
13
|
+
VERSION = Gem::Specification.load(File.expand_path("../website_screenshot.gemspec", File.dirname(__FILE__))).version.to_s
|
14
|
+
|
15
|
+
@@state = "waiting"
|
16
|
+
@@progress = 0
|
17
|
+
@@started_at = nil
|
18
|
+
|
19
|
+
# Timeout before killing the page.
|
20
|
+
attr_accessor :render_timeout
|
21
|
+
|
22
|
+
attr_accessor :check_loading_status_interval #:nodoc:
|
23
|
+
|
24
|
+
# Filename to save the file to.
|
25
|
+
attr_accessor :file_name
|
26
|
+
|
27
|
+
# Window size.
|
28
|
+
attr_accessor :size
|
29
|
+
|
30
|
+
# Page url.
|
31
|
+
attr_accessor :url
|
32
|
+
|
33
|
+
attr_accessor :verbose #:nodoc:
|
34
|
+
|
35
|
+
# Instantiates a new object.
|
36
|
+
# ==== Options:
|
37
|
+
# [*url*] Website path. URL redirects are automatically resolved using +curl+.
|
38
|
+
# [*file_name*] Name of the saved image. Defaults to output.png.
|
39
|
+
# [*render_timeout*] Timeout before killing the page. Defaults at two minutes.
|
40
|
+
# [*check_loading_status_interval*] Interval between page status checks.
|
41
|
+
# [*size*] Window size the page is being rendered into. Defaults at 1360x768.
|
42
|
+
# [*verbose*] Outputs page load progress.
|
43
|
+
def initialize(args)
|
44
|
+
self.render_timeout = args[:render_timeout] || 120
|
45
|
+
self.check_loading_status_interval = args[:check_loading_status_interval] || 0.1
|
46
|
+
self.file_name = args[:file_name] || "output.png"
|
47
|
+
self.size = args[:size] || "1360x768"
|
48
|
+
self.url = args[:url]
|
49
|
+
self.verbose = args[:verbose] # default FALSE
|
50
|
+
end
|
51
|
+
|
52
|
+
# Renders the website and saves a screenshot.
|
53
|
+
# ==== Returns:
|
54
|
+
# * If the webpage began rendering, the load percentage. A screenshot is saved if the page has been +50%+ loaded or more.
|
55
|
+
# * +false+ if for some reason the +url+ could not be opened, or the browser initialized.
|
56
|
+
def get
|
57
|
+
|
58
|
+
app = Qt::Application.new(ARGV)
|
59
|
+
webview = Qt::WebView.new()
|
60
|
+
|
61
|
+
webview.connect(SIGNAL("loadStarted()")) do
|
62
|
+
@@started_at = Time.now.to_i
|
63
|
+
end
|
64
|
+
|
65
|
+
webview.connect(SIGNAL("loadFinished(bool)")) do |result|
|
66
|
+
if result
|
67
|
+
@@state = "finished-success"
|
68
|
+
else
|
69
|
+
@@state = "finished-fail"
|
70
|
+
@@progress = false
|
71
|
+
end
|
72
|
+
suspend_thread # Give it enough time to switch to the sentinel thread and avoid an empty exec loop.
|
73
|
+
end
|
74
|
+
|
75
|
+
webview.connect(SIGNAL("loadProgress(int)")) do |progress|
|
76
|
+
puts "#{progress}%" if verbose
|
77
|
+
@@progress = progress
|
78
|
+
suspend_thread if has_reached_time_out?
|
79
|
+
end
|
80
|
+
|
81
|
+
# Enable flash, javascript and some other sensible browsing options.
|
82
|
+
webview::settings()::setAttribute(Qt::WebSettings::PluginsEnabled, true)
|
83
|
+
webview::settings()::setAttribute(Qt::WebSettings::JavascriptCanOpenWindows, false)
|
84
|
+
webview::settings()::setAttribute(Qt::WebSettings::PrivateBrowsingEnabled, true)
|
85
|
+
webview::settings()::setAttribute(Qt::WebSettings::JavascriptEnabled, true)
|
86
|
+
|
87
|
+
# Hide the scrollbars.
|
88
|
+
webview.page.mainFrame.setScrollBarPolicy(Qt::Horizontal, Qt::ScrollBarAlwaysOff)
|
89
|
+
webview.page.mainFrame.setScrollBarPolicy(Qt::Vertical, Qt::ScrollBarAlwaysOff)
|
90
|
+
|
91
|
+
webview.load(Qt::Url.new(url))
|
92
|
+
webview.resize(size)
|
93
|
+
webview.show
|
94
|
+
render_page_thread = Thread.new do
|
95
|
+
app.exec
|
96
|
+
end
|
97
|
+
|
98
|
+
check_status_thread = Thread.new do
|
99
|
+
while true do
|
100
|
+
sleep check_loading_status_interval
|
101
|
+
if @@state =~ /^finished/ || has_reached_time_out?
|
102
|
+
# Save a screenshot if page finished loaded or it has timed out with 50%+ completion.
|
103
|
+
save(webview) if @@state == "finished-success" || @@progress >= 50
|
104
|
+
render_page_thread.kill
|
105
|
+
break
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
check_status_thread.join
|
111
|
+
render_page_thread.join
|
112
|
+
|
113
|
+
return @@progress
|
114
|
+
|
115
|
+
end
|
116
|
+
|
117
|
+
# Sets the window size.
|
118
|
+
#
|
119
|
+
# The geometry must have the following format: _widthxheight_.
|
120
|
+
# ==== Returns:
|
121
|
+
# A Qt::Size object.
|
122
|
+
def size=(geometry)
|
123
|
+
geometry_information = geometry.split("x")
|
124
|
+
@size = Qt::Size.new(geometry_information.first.to_i, geometry_information.last.to_i)
|
125
|
+
end
|
126
|
+
|
127
|
+
# Sets the website url.
|
128
|
+
#
|
129
|
+
# Calls the operating systems +curl+ command to follow redirects.
|
130
|
+
def url=(path)
|
131
|
+
@url = %x[curl "#{path}" -A "Mozilla/5.0 (QtWebkit; WebsiteScreenshot)" -L -o /dev/null -w %{url_effective}]
|
132
|
+
end
|
133
|
+
|
134
|
+
private
|
135
|
+
|
136
|
+
def has_reached_time_out?
|
137
|
+
Time.now.to_i >= (@@started_at + render_timeout)
|
138
|
+
end
|
139
|
+
|
140
|
+
def suspend_thread
|
141
|
+
sleep(30)
|
142
|
+
end
|
143
|
+
|
144
|
+
def save(webview)
|
145
|
+
sleep(5) # Wait a few seconds to allow some/any of the animations to take place
|
146
|
+
pixmap = Qt::Pixmap.grabWindow(webview.window.winId)
|
147
|
+
pixmap.save(file_name, File.extname(file_name).tr(".",""))
|
148
|
+
end
|
149
|
+
|
150
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
spec = Gem::Specification.new do |spec|
|
2
|
+
spec.name = "website_screenshot"
|
3
|
+
spec.version = "1.0"
|
4
|
+
spec.summary = "Takes screenshots of webpages"
|
5
|
+
spec.description = <<-EOF
|
6
|
+
Creates a new webkit window using the QT framework of a specified url and saves a screenshot when the page has finished loading.
|
7
|
+
EOF
|
8
|
+
|
9
|
+
spec.authors << "Daniel Mircea"
|
10
|
+
spec.email = "daniel@viseztrance.com"
|
11
|
+
spec.homepage = "http://github.com/viseztrance/website_screenshot"
|
12
|
+
|
13
|
+
spec.files = Dir["{bin,lib,docs}/**/*"] + ["README.rdoc", "LICENSE", "Rakefile", "website_screenshot.gemspec"]
|
14
|
+
spec.executables = "website-screenshot"
|
15
|
+
|
16
|
+
spec.has_rdoc = true
|
17
|
+
spec.rdoc_options << "--main" << "README.rdoc" << "--title" << "Website Screenshot" << "--line-numbers"
|
18
|
+
"--webcvs" << "http://github.com/viseztrance/website_screenshot"
|
19
|
+
spec.extra_rdoc_files = ["README.rdoc", "LICENSE"]
|
20
|
+
end
|
metadata
ADDED
@@ -0,0 +1,78 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: website_screenshot
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 15
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 1
|
8
|
+
- 0
|
9
|
+
version: "1.0"
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Daniel Mircea
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2011-02-27 00:00:00 +02:00
|
18
|
+
default_executable:
|
19
|
+
dependencies: []
|
20
|
+
|
21
|
+
description: |
|
22
|
+
Creates a new webkit window using the QT framework of a specified url and saves a screenshot when the page has finished loading.
|
23
|
+
|
24
|
+
email: daniel@viseztrance.com
|
25
|
+
executables:
|
26
|
+
- website-screenshot
|
27
|
+
extensions: []
|
28
|
+
|
29
|
+
extra_rdoc_files:
|
30
|
+
- README.rdoc
|
31
|
+
- LICENSE
|
32
|
+
files:
|
33
|
+
- bin/website-screenshot
|
34
|
+
- lib/website_screenshot.rb
|
35
|
+
- README.rdoc
|
36
|
+
- LICENSE
|
37
|
+
- Rakefile
|
38
|
+
- website_screenshot.gemspec
|
39
|
+
has_rdoc: true
|
40
|
+
homepage: http://github.com/viseztrance/website_screenshot
|
41
|
+
licenses: []
|
42
|
+
|
43
|
+
post_install_message:
|
44
|
+
rdoc_options:
|
45
|
+
- --main
|
46
|
+
- README.rdoc
|
47
|
+
- --title
|
48
|
+
- Website Screenshot
|
49
|
+
- --line-numbers
|
50
|
+
require_paths:
|
51
|
+
- lib
|
52
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
53
|
+
none: false
|
54
|
+
requirements:
|
55
|
+
- - ">="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
hash: 3
|
58
|
+
segments:
|
59
|
+
- 0
|
60
|
+
version: "0"
|
61
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
62
|
+
none: false
|
63
|
+
requirements:
|
64
|
+
- - ">="
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
hash: 3
|
67
|
+
segments:
|
68
|
+
- 0
|
69
|
+
version: "0"
|
70
|
+
requirements: []
|
71
|
+
|
72
|
+
rubyforge_project:
|
73
|
+
rubygems_version: 1.3.7
|
74
|
+
signing_key:
|
75
|
+
specification_version: 3
|
76
|
+
summary: Takes screenshots of webpages
|
77
|
+
test_files: []
|
78
|
+
|