chupa-text-decomposer-webkit 1.0.4 → 1.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/chupa-text-decomposer-webkit.gemspec +1 -1
- data/doc/text/news.md +6 -0
- data/lib/chupa-text-decomposer-webkit/screenshoter.rb +216 -0
- data/lib/chupa-text/decomposers/webkit.rb +69 -139
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b28aa0772e09cadcde4ed6a92ebe681019aac5b2
|
4
|
+
data.tar.gz: 12f606f199ef7593ff54aec5f11674bc90b077bf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b1bda3a888d92deab02ef9e51d3a8a2ef97bbc4f6d9e39b85c3310ffbe51e9f6b46cb01510cbeef793129203f7f43d0431f16e52ce04abb58f94aa22c5f3b9c6
|
7
|
+
data.tar.gz: e39a3397f022c2df03284f88657443f14e5940720619f2ff17b44586e0d6400d7417c349e53db3939081c54f8c3fa4024a0ac2a6769ed3fd8b3f9d33d4e90a69
|
@@ -22,7 +22,7 @@ end
|
|
22
22
|
|
23
23
|
Gem::Specification.new do |spec|
|
24
24
|
spec.name = "chupa-text-decomposer-webkit"
|
25
|
-
spec.version = "1.0.
|
25
|
+
spec.version = "1.0.5"
|
26
26
|
spec.homepage = "https://github.com/ranguba/chupa-text-decomposer-webkit"
|
27
27
|
spec.authors = ["Kouhei Sutou"]
|
28
28
|
spec.email = ["kou@clear-code.com"]
|
data/doc/text/news.md
CHANGED
@@ -0,0 +1,216 @@
|
|
1
|
+
# Copyright (C) 2017 Kouhei Sutou <kou@clear-code.com>
|
2
|
+
#
|
3
|
+
# This library is free software; you can redistribute it and/or
|
4
|
+
# modify it under the terms of the GNU Lesser General Public
|
5
|
+
# License as published by the Free Software Foundation; either
|
6
|
+
# version 2.1 of the License, or (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This library is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
11
|
+
# Lesser General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU Lesser General Public
|
14
|
+
# License along with this library; if not, write to the Free Software
|
15
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
16
|
+
|
17
|
+
require "webkit2-gtk"
|
18
|
+
|
19
|
+
module ChupaTextDecomposerWebKit
|
20
|
+
class Screenshoter
|
21
|
+
def initialize(logger)
|
22
|
+
@logger = logger
|
23
|
+
@view_context = create_view_context
|
24
|
+
@view = create_view
|
25
|
+
@window = create_window
|
26
|
+
@main_loop = GLib::MainLoop.new(nil, false)
|
27
|
+
@timeout_second = compute_timeout_second
|
28
|
+
@screenshot_cancellable = nil
|
29
|
+
@on_snapshot = nil
|
30
|
+
end
|
31
|
+
|
32
|
+
def run(body, uri, output_path, width, height)
|
33
|
+
@on_snapshot = lambda do |snapshot_surface|
|
34
|
+
scaled_surface = scale_snapshot(snapshot_surface, width, height)
|
35
|
+
scaled_surface.write_to_png(output_path)
|
36
|
+
end
|
37
|
+
|
38
|
+
begin
|
39
|
+
timeout do
|
40
|
+
debug do
|
41
|
+
"#{log_tag}[load][HTML] #{uri}"
|
42
|
+
end
|
43
|
+
@view.load_html(body, uri)
|
44
|
+
@main_loop.run
|
45
|
+
end
|
46
|
+
ensure
|
47
|
+
@on_snapshot = nil
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
def create_view_context
|
53
|
+
context = WebKit2Gtk::WebContext.new(ephemeral: true)
|
54
|
+
http_proxy = ENV["http_proxy"]
|
55
|
+
https_proxy = ENV["https_proxy"]
|
56
|
+
ftp_proxy = ENV["ftp_proxy"]
|
57
|
+
if http_proxy or https_proxy or ftp_proxy
|
58
|
+
proxy_settings = WebKit2Gtk::NetworkProxySettings.new
|
59
|
+
if http_proxy
|
60
|
+
proxy_settings.add_proxy_for_scheme("http", http_proxy)
|
61
|
+
end
|
62
|
+
if https_proxy
|
63
|
+
proxy_settings.add_proxy_for_scheme("https", https_proxy)
|
64
|
+
end
|
65
|
+
if ftp_proxy
|
66
|
+
proxy_settings.add_proxy_for_scheme("ftp", ftp_proxy)
|
67
|
+
end
|
68
|
+
context.set_network_proxy_settings(:custom, proxy_settings)
|
69
|
+
end
|
70
|
+
context
|
71
|
+
end
|
72
|
+
|
73
|
+
def create_view
|
74
|
+
view = WebKit2Gtk::WebView.new(context: @view_context)
|
75
|
+
|
76
|
+
view.signal_connect("load-changed") do |_, load_event|
|
77
|
+
debug do
|
78
|
+
"#{log_tag}[load][#{load_event.nick}] #{view.uri}"
|
79
|
+
end
|
80
|
+
|
81
|
+
case load_event
|
82
|
+
when WebKit2Gtk::LoadEvent::FINISHED
|
83
|
+
debug do
|
84
|
+
"#{log_tag}[screenshot][start] #{view.uri}"
|
85
|
+
end
|
86
|
+
cancel_screenshot
|
87
|
+
@screenshot_cancellable = Gio::Cancellable.new
|
88
|
+
view.get_snapshot(:full_document,
|
89
|
+
:none,
|
90
|
+
@screenshot_cancellable) do |_, result|
|
91
|
+
@screenshot_cancellable = nil
|
92
|
+
@main_loop.quit
|
93
|
+
begin
|
94
|
+
snapshot_surface = view.get_snapshot_finish(result)
|
95
|
+
rescue
|
96
|
+
error do
|
97
|
+
message = "failed to create snapshot: #{view.uri}: "
|
98
|
+
message << "#{$!.class}: #{$!.message}"
|
99
|
+
"#{log_tag}[screenshot][failed] #{message}"
|
100
|
+
end
|
101
|
+
else
|
102
|
+
debug do
|
103
|
+
size = "#{snapshot_surface.width}x#{snapshot_surface.height}"
|
104
|
+
"#{log_tag}[screenshot][finish] #{view.uri}: #{size}"
|
105
|
+
end
|
106
|
+
unless snapshot_surface.width.zero?
|
107
|
+
@on_snapshot.call(snapshot_surface) if @on_snapshot
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
view.signal_connect("load-failed") do |_, _, failed_uri, error|
|
115
|
+
cancel_screenshot
|
116
|
+
@main_loop.quit
|
117
|
+
error do
|
118
|
+
message = "failed to load URI: #{failed_uri}: "
|
119
|
+
message << "#{error.class}(#{error.code}): #{error.message}"
|
120
|
+
"#{log_tag}[load][failed] #{message}"
|
121
|
+
end
|
122
|
+
true
|
123
|
+
end
|
124
|
+
|
125
|
+
view
|
126
|
+
end
|
127
|
+
|
128
|
+
def scale_snapshot(snapshot_surface, width, height)
|
129
|
+
scaled_surface = Cairo::ImageSurface.new(:argb32, width, height)
|
130
|
+
|
131
|
+
context = Cairo::Context.new(scaled_surface)
|
132
|
+
context.set_source_color(:white)
|
133
|
+
context.paint
|
134
|
+
|
135
|
+
ratio = width.to_f / snapshot_surface.width
|
136
|
+
context.scale(ratio, ratio)
|
137
|
+
context.set_source(snapshot_surface)
|
138
|
+
context.paint
|
139
|
+
|
140
|
+
scaled_surface
|
141
|
+
end
|
142
|
+
|
143
|
+
def create_window
|
144
|
+
window = Gtk::OffscreenWindow.new
|
145
|
+
window.set_default_size(800, 600)
|
146
|
+
window.add(@view)
|
147
|
+
window.show_all
|
148
|
+
window
|
149
|
+
end
|
150
|
+
|
151
|
+
def cancel_screenshot
|
152
|
+
return if @screenshot_cancellable.nil?
|
153
|
+
|
154
|
+
debug do
|
155
|
+
"#{log_tag}[snapshot][cancel] cancel screenshot: #{@view.uri}"
|
156
|
+
end
|
157
|
+
@screenshot_cancellable.cancel
|
158
|
+
@screenshot_cancellable = nil
|
159
|
+
end
|
160
|
+
|
161
|
+
def timeout
|
162
|
+
timeout_id = GLib::Timeout.add_seconds(@timeout_second) do
|
163
|
+
timeout_id = nil
|
164
|
+
error do
|
165
|
+
message = "timeout to load URI: #{@timeout_second}s: #{@view.uri}"
|
166
|
+
message << ": loading" if @view.loading?
|
167
|
+
"#{log_tag}[load][timeout] #{message}"
|
168
|
+
end
|
169
|
+
cancel_screenshot
|
170
|
+
if @view.loading?
|
171
|
+
close_id = @view.signal_connect("close") do
|
172
|
+
@view.signal_handler_disconnect(close_id)
|
173
|
+
@main_loop.quit
|
174
|
+
error do
|
175
|
+
"#{log_tag}[load][closed] #{@view.uri}"
|
176
|
+
end
|
177
|
+
end
|
178
|
+
@view.try_close
|
179
|
+
else
|
180
|
+
@main_loop.quit
|
181
|
+
end
|
182
|
+
GLib::Source::REMOVE
|
183
|
+
end
|
184
|
+
|
185
|
+
begin
|
186
|
+
yield
|
187
|
+
ensure
|
188
|
+
GLib::Source.remove(timeout_id) if timeout_id
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
def compute_timeout_second
|
193
|
+
default_timeout = 5
|
194
|
+
timeout_string =
|
195
|
+
ENV["CHUPA_TEXT_DECOMPOSER_WEBKIT_TIMEOUT"] || default_timeout.to_s
|
196
|
+
begin
|
197
|
+
Integer(timeout_string)
|
198
|
+
rescue ArgumentError
|
199
|
+
default_timeout
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
private
|
204
|
+
def log_tag
|
205
|
+
"[decomposer][webkit]"
|
206
|
+
end
|
207
|
+
|
208
|
+
def debug(*args, &block)
|
209
|
+
@logger.debug(*args, &block)
|
210
|
+
end
|
211
|
+
|
212
|
+
def error(*args, &block)
|
213
|
+
@logger.error(*args, &block)
|
214
|
+
end
|
215
|
+
end
|
216
|
+
end
|
@@ -14,7 +14,8 @@
|
|
14
14
|
# License along with this library; if not, write to the Free Software
|
15
15
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
16
16
|
|
17
|
-
require "
|
17
|
+
require "English"
|
18
|
+
require "rbconfig"
|
18
19
|
|
19
20
|
module ChupaText
|
20
21
|
module Decomposers
|
@@ -57,161 +58,90 @@ module ChupaText
|
|
57
58
|
false
|
58
59
|
end
|
59
60
|
|
61
|
+
IN_PROCESS = ENV["CHUPA_TEXT_DECOMPOSER_WEBKIT_IN_PROCESS"] == "yes"
|
62
|
+
if IN_PROCESS
|
63
|
+
require "chupa-text-decomposer-webkit/screenshoter"
|
64
|
+
end
|
65
|
+
|
60
66
|
def decompose(data)
|
61
|
-
|
62
|
-
|
67
|
+
body = data.source.body
|
68
|
+
uri = data.source.uri.to_s
|
69
|
+
output = Tempfile.new(["chupa-text-decomposer-webkit", ".png"])
|
70
|
+
width, height = data.expected_screenshot_size
|
71
|
+
if IN_PROCESS
|
72
|
+
screenshoter = ChupaTextDecomposerWebKit::Screenshoter.new(logger)
|
73
|
+
screenshoter.run(body, uri, output.path, width, height)
|
74
|
+
else
|
75
|
+
screenshoter = ExternalScreenshoter.new
|
76
|
+
screenshoter.run(data.source.path, uri, output.path, width, height)
|
77
|
+
end
|
78
|
+
unless File.size(output.path).zero?
|
79
|
+
png = output.read
|
80
|
+
data.screenshot = Screenshot.new("image/png",
|
81
|
+
[png].pack("m*"),
|
82
|
+
"base64")
|
83
|
+
end
|
63
84
|
data[AVAILABLE_ATTRIBUTE_NAME] = !data.screenshot.nil?
|
64
85
|
yield(data)
|
65
86
|
end
|
66
87
|
|
67
|
-
class
|
88
|
+
class ExternalScreenshoter
|
68
89
|
include Loggable
|
69
90
|
include LogTag
|
70
91
|
|
71
|
-
def initialize
|
72
|
-
@
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
view = WebKit2Gtk::WebView.new(context: @@view_context)
|
80
|
-
window = Gtk::OffscreenWindow.new
|
81
|
-
window.set_default_size(800, 600)
|
82
|
-
window.add(view)
|
83
|
-
window.show_all
|
84
|
-
|
85
|
-
setup_callbacks(view)
|
86
|
-
|
87
|
-
timeout(view) do
|
88
|
-
debug do
|
89
|
-
"#{log_tag}[load][HTML] #{@data.uri}"
|
90
|
-
end
|
91
|
-
view.load_html(@data.source.body, @data.source.uri.to_s)
|
92
|
-
@main_loop.run
|
93
|
-
end
|
94
|
-
|
95
|
-
window.destroy
|
96
|
-
end
|
97
|
-
|
98
|
-
private
|
99
|
-
def create_view_context
|
100
|
-
context = WebKit2Gtk::WebContext.new(ephemeral: true)
|
101
|
-
http_proxy = ENV["http_proxy"]
|
102
|
-
https_proxy = ENV["https_proxy"]
|
103
|
-
ftp_proxy = ENV["ftp_proxy"]
|
104
|
-
if http_proxy or https_proxy or ftp_proxy
|
105
|
-
proxy_settings = WebKit2Gtk::NetworkProxySettings.new
|
106
|
-
if http_proxy
|
107
|
-
proxy_settings.add_proxy_for_scheme("http", http_proxy)
|
108
|
-
end
|
109
|
-
if https_proxy
|
110
|
-
proxy_settings.add_proxy_for_scheme("https", https_proxy)
|
111
|
-
end
|
112
|
-
if ftp_proxy
|
113
|
-
proxy_settings.add_proxy_for_scheme("ftp", ftp_proxy)
|
114
|
-
end
|
115
|
-
context.set_network_proxy_settings(:custom, proxy_settings)
|
116
|
-
end
|
117
|
-
context
|
92
|
+
def initialize
|
93
|
+
@screenshoter = File.join(__dir__,
|
94
|
+
"..",
|
95
|
+
"..",
|
96
|
+
"..",
|
97
|
+
"bin",
|
98
|
+
"chupa-text-decomposer-webkit-screenshoter")
|
99
|
+
@command = ExternalCommand.new(RbConfig.ruby)
|
118
100
|
end
|
119
101
|
|
120
|
-
def
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
debug do
|
135
|
-
size = "#{snapshot_surface.width}x#{snapshot_surface.height}"
|
136
|
-
"#{log_tag}[screenshot][finish] #{view.uri}: #{size}"
|
137
|
-
end
|
138
|
-
unless snapshot_surface.width.zero?
|
139
|
-
png = convert_snapshot_surface_to_png(snapshot_surface)
|
140
|
-
@data.screenshot = Screenshot.new("image/png",
|
141
|
-
[png].pack("m*"),
|
142
|
-
"base64")
|
143
|
-
end
|
102
|
+
def run(html_path, uri, output_path, width, height)
|
103
|
+
output_read, output_write = IO.pipe
|
104
|
+
error_output = Tempfile.new("chupa-text-decomposer-webkit-error")
|
105
|
+
output_reader = Thread.new do
|
106
|
+
loop do
|
107
|
+
IO.select([output_read])
|
108
|
+
line = output_read.gets
|
109
|
+
break if line.nil?
|
110
|
+
|
111
|
+
case line.chomp
|
112
|
+
when /\Adebug: /
|
113
|
+
debug($POSTMATCH)
|
114
|
+
when /\Aerror: /
|
115
|
+
error($POSTMATCH)
|
144
116
|
end
|
145
117
|
end
|
146
118
|
end
|
147
|
-
|
148
|
-
|
119
|
+
successed = @command.run(@screenshoter,
|
120
|
+
html_path,
|
121
|
+
uri,
|
122
|
+
output_path,
|
123
|
+
width.to_s,
|
124
|
+
height.to_s,
|
125
|
+
{
|
126
|
+
:spawn_options => {
|
127
|
+
:out => output_write,
|
128
|
+
:err => error_output.path,
|
129
|
+
},
|
130
|
+
})
|
131
|
+
output_write.close
|
132
|
+
output_reader.join
|
133
|
+
|
134
|
+
unless successed
|
149
135
|
error do
|
150
|
-
message = "failed to
|
151
|
-
message << "#{
|
152
|
-
"#{log_tag}[
|
136
|
+
message = "failed to external screenshoter: #{uri}: "
|
137
|
+
message << "#{@command.path} #{@screenshoter}"
|
138
|
+
"#{log_tag}[external-screenshoter][run][failed] #{message}"
|
153
139
|
end
|
154
|
-
true
|
155
140
|
end
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
screenshot_width, screenshot_height = @data.expected_screenshot_size
|
160
|
-
|
161
|
-
screenshot_surface = Cairo::ImageSurface.new(:argb32,
|
162
|
-
screenshot_width,
|
163
|
-
screenshot_height)
|
164
|
-
context = Cairo::Context.new(screenshot_surface)
|
165
|
-
context.set_source_color(:white)
|
166
|
-
context.paint
|
167
|
-
|
168
|
-
ratio = screenshot_width.to_f / snapshot_surface.width
|
169
|
-
context.scale(ratio, ratio)
|
170
|
-
context.set_source(snapshot_surface)
|
171
|
-
context.paint
|
172
|
-
|
173
|
-
png = StringIO.new
|
174
|
-
screenshot_surface.write_to_png(png)
|
175
|
-
png.string
|
176
|
-
end
|
177
|
-
|
178
|
-
def timeout(view)
|
179
|
-
timeout_id = GLib::Timeout.add_seconds(@timeout_second) do
|
180
|
-
timeout_id = nil
|
181
|
-
error do
|
182
|
-
message = "timeout to load URI: #{@timeout_second}s: #{view.uri}"
|
183
|
-
message << ": loading" if view.loading?
|
184
|
-
"#{log_tag}[load][timeout] #{message}"
|
141
|
+
unless error_output.size.zero?
|
142
|
+
error_output.each_line do |line|
|
143
|
+
error(line)
|
185
144
|
end
|
186
|
-
if view.loading?
|
187
|
-
view.signal_connect("close") do
|
188
|
-
@main_loop.quit
|
189
|
-
error do
|
190
|
-
"#{log_tag}[load][closed] #{view.uri}"
|
191
|
-
end
|
192
|
-
end
|
193
|
-
view.try_close
|
194
|
-
else
|
195
|
-
@main_loop.quit
|
196
|
-
end
|
197
|
-
GLib::Source::REMOVE
|
198
|
-
end
|
199
|
-
|
200
|
-
begin
|
201
|
-
yield
|
202
|
-
ensure
|
203
|
-
GLib::Source.remove(timeout_id) if timeout_id
|
204
|
-
end
|
205
|
-
end
|
206
|
-
|
207
|
-
def compute_timeout_second
|
208
|
-
default_timeout = 5
|
209
|
-
timeout_string =
|
210
|
-
ENV["CHUPA_TEXT_DECOMPOSER_WEBKIT_TIMEOUT"] || default_timeout.to_s
|
211
|
-
begin
|
212
|
-
Integer(timeout_string)
|
213
|
-
rescue ArgumentError
|
214
|
-
default_timeout
|
215
145
|
end
|
216
146
|
end
|
217
147
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chupa-text-decomposer-webkit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kouhei Sutou
|
@@ -126,6 +126,7 @@ files:
|
|
126
126
|
- Rakefile
|
127
127
|
- chupa-text-decomposer-webkit.gemspec
|
128
128
|
- doc/text/news.md
|
129
|
+
- lib/chupa-text-decomposer-webkit/screenshoter.rb
|
129
130
|
- lib/chupa-text/decomposers/webkit.rb
|
130
131
|
- test/run-test.rb
|
131
132
|
- test/test-webkit.rb
|