chupa-text-decomposer-webkit 1.0.4 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/chupa-text-decomposer-webkit.gemspec +1 -1
- data/doc/text/news.md +6 -0
- data/lib/chupa-text-decomposer-webkit/screenshoter.rb +216 -0
- data/lib/chupa-text/decomposers/webkit.rb +69 -139
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b28aa0772e09cadcde4ed6a92ebe681019aac5b2
|
4
|
+
data.tar.gz: 12f606f199ef7593ff54aec5f11674bc90b077bf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b1bda3a888d92deab02ef9e51d3a8a2ef97bbc4f6d9e39b85c3310ffbe51e9f6b46cb01510cbeef793129203f7f43d0431f16e52ce04abb58f94aa22c5f3b9c6
|
7
|
+
data.tar.gz: e39a3397f022c2df03284f88657443f14e5940720619f2ff17b44586e0d6400d7417c349e53db3939081c54f8c3fa4024a0ac2a6769ed3fd8b3f9d33d4e90a69
|
@@ -22,7 +22,7 @@ end
|
|
22
22
|
|
23
23
|
Gem::Specification.new do |spec|
|
24
24
|
spec.name = "chupa-text-decomposer-webkit"
|
25
|
-
spec.version = "1.0.
|
25
|
+
spec.version = "1.0.5"
|
26
26
|
spec.homepage = "https://github.com/ranguba/chupa-text-decomposer-webkit"
|
27
27
|
spec.authors = ["Kouhei Sutou"]
|
28
28
|
spec.email = ["kou@clear-code.com"]
|
data/doc/text/news.md
CHANGED
@@ -0,0 +1,216 @@
|
|
1
|
+
# Copyright (C) 2017 Kouhei Sutou <kou@clear-code.com>
|
2
|
+
#
|
3
|
+
# This library is free software; you can redistribute it and/or
|
4
|
+
# modify it under the terms of the GNU Lesser General Public
|
5
|
+
# License as published by the Free Software Foundation; either
|
6
|
+
# version 2.1 of the License, or (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This library is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
11
|
+
# Lesser General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU Lesser General Public
|
14
|
+
# License along with this library; if not, write to the Free Software
|
15
|
+
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
16
|
+
|
17
|
+
require "webkit2-gtk"
|
18
|
+
|
19
|
+
module ChupaTextDecomposerWebKit
|
20
|
+
class Screenshoter
|
21
|
+
def initialize(logger)
|
22
|
+
@logger = logger
|
23
|
+
@view_context = create_view_context
|
24
|
+
@view = create_view
|
25
|
+
@window = create_window
|
26
|
+
@main_loop = GLib::MainLoop.new(nil, false)
|
27
|
+
@timeout_second = compute_timeout_second
|
28
|
+
@screenshot_cancellable = nil
|
29
|
+
@on_snapshot = nil
|
30
|
+
end
|
31
|
+
|
32
|
+
def run(body, uri, output_path, width, height)
|
33
|
+
@on_snapshot = lambda do |snapshot_surface|
|
34
|
+
scaled_surface = scale_snapshot(snapshot_surface, width, height)
|
35
|
+
scaled_surface.write_to_png(output_path)
|
36
|
+
end
|
37
|
+
|
38
|
+
begin
|
39
|
+
timeout do
|
40
|
+
debug do
|
41
|
+
"#{log_tag}[load][HTML] #{uri}"
|
42
|
+
end
|
43
|
+
@view.load_html(body, uri)
|
44
|
+
@main_loop.run
|
45
|
+
end
|
46
|
+
ensure
|
47
|
+
@on_snapshot = nil
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
def create_view_context
|
53
|
+
context = WebKit2Gtk::WebContext.new(ephemeral: true)
|
54
|
+
http_proxy = ENV["http_proxy"]
|
55
|
+
https_proxy = ENV["https_proxy"]
|
56
|
+
ftp_proxy = ENV["ftp_proxy"]
|
57
|
+
if http_proxy or https_proxy or ftp_proxy
|
58
|
+
proxy_settings = WebKit2Gtk::NetworkProxySettings.new
|
59
|
+
if http_proxy
|
60
|
+
proxy_settings.add_proxy_for_scheme("http", http_proxy)
|
61
|
+
end
|
62
|
+
if https_proxy
|
63
|
+
proxy_settings.add_proxy_for_scheme("https", https_proxy)
|
64
|
+
end
|
65
|
+
if ftp_proxy
|
66
|
+
proxy_settings.add_proxy_for_scheme("ftp", ftp_proxy)
|
67
|
+
end
|
68
|
+
context.set_network_proxy_settings(:custom, proxy_settings)
|
69
|
+
end
|
70
|
+
context
|
71
|
+
end
|
72
|
+
|
73
|
+
def create_view
|
74
|
+
view = WebKit2Gtk::WebView.new(context: @view_context)
|
75
|
+
|
76
|
+
view.signal_connect("load-changed") do |_, load_event|
|
77
|
+
debug do
|
78
|
+
"#{log_tag}[load][#{load_event.nick}] #{view.uri}"
|
79
|
+
end
|
80
|
+
|
81
|
+
case load_event
|
82
|
+
when WebKit2Gtk::LoadEvent::FINISHED
|
83
|
+
debug do
|
84
|
+
"#{log_tag}[screenshot][start] #{view.uri}"
|
85
|
+
end
|
86
|
+
cancel_screenshot
|
87
|
+
@screenshot_cancellable = Gio::Cancellable.new
|
88
|
+
view.get_snapshot(:full_document,
|
89
|
+
:none,
|
90
|
+
@screenshot_cancellable) do |_, result|
|
91
|
+
@screenshot_cancellable = nil
|
92
|
+
@main_loop.quit
|
93
|
+
begin
|
94
|
+
snapshot_surface = view.get_snapshot_finish(result)
|
95
|
+
rescue
|
96
|
+
error do
|
97
|
+
message = "failed to create snapshot: #{view.uri}: "
|
98
|
+
message << "#{$!.class}: #{$!.message}"
|
99
|
+
"#{log_tag}[screenshot][failed] #{message}"
|
100
|
+
end
|
101
|
+
else
|
102
|
+
debug do
|
103
|
+
size = "#{snapshot_surface.width}x#{snapshot_surface.height}"
|
104
|
+
"#{log_tag}[screenshot][finish] #{view.uri}: #{size}"
|
105
|
+
end
|
106
|
+
unless snapshot_surface.width.zero?
|
107
|
+
@on_snapshot.call(snapshot_surface) if @on_snapshot
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
view.signal_connect("load-failed") do |_, _, failed_uri, error|
|
115
|
+
cancel_screenshot
|
116
|
+
@main_loop.quit
|
117
|
+
error do
|
118
|
+
message = "failed to load URI: #{failed_uri}: "
|
119
|
+
message << "#{error.class}(#{error.code}): #{error.message}"
|
120
|
+
"#{log_tag}[load][failed] #{message}"
|
121
|
+
end
|
122
|
+
true
|
123
|
+
end
|
124
|
+
|
125
|
+
view
|
126
|
+
end
|
127
|
+
|
128
|
+
def scale_snapshot(snapshot_surface, width, height)
|
129
|
+
scaled_surface = Cairo::ImageSurface.new(:argb32, width, height)
|
130
|
+
|
131
|
+
context = Cairo::Context.new(scaled_surface)
|
132
|
+
context.set_source_color(:white)
|
133
|
+
context.paint
|
134
|
+
|
135
|
+
ratio = width.to_f / snapshot_surface.width
|
136
|
+
context.scale(ratio, ratio)
|
137
|
+
context.set_source(snapshot_surface)
|
138
|
+
context.paint
|
139
|
+
|
140
|
+
scaled_surface
|
141
|
+
end
|
142
|
+
|
143
|
+
def create_window
|
144
|
+
window = Gtk::OffscreenWindow.new
|
145
|
+
window.set_default_size(800, 600)
|
146
|
+
window.add(@view)
|
147
|
+
window.show_all
|
148
|
+
window
|
149
|
+
end
|
150
|
+
|
151
|
+
def cancel_screenshot
|
152
|
+
return if @screenshot_cancellable.nil?
|
153
|
+
|
154
|
+
debug do
|
155
|
+
"#{log_tag}[snapshot][cancel] cancel screenshot: #{@view.uri}"
|
156
|
+
end
|
157
|
+
@screenshot_cancellable.cancel
|
158
|
+
@screenshot_cancellable = nil
|
159
|
+
end
|
160
|
+
|
161
|
+
def timeout
|
162
|
+
timeout_id = GLib::Timeout.add_seconds(@timeout_second) do
|
163
|
+
timeout_id = nil
|
164
|
+
error do
|
165
|
+
message = "timeout to load URI: #{@timeout_second}s: #{@view.uri}"
|
166
|
+
message << ": loading" if @view.loading?
|
167
|
+
"#{log_tag}[load][timeout] #{message}"
|
168
|
+
end
|
169
|
+
cancel_screenshot
|
170
|
+
if @view.loading?
|
171
|
+
close_id = @view.signal_connect("close") do
|
172
|
+
@view.signal_handler_disconnect(close_id)
|
173
|
+
@main_loop.quit
|
174
|
+
error do
|
175
|
+
"#{log_tag}[load][closed] #{@view.uri}"
|
176
|
+
end
|
177
|
+
end
|
178
|
+
@view.try_close
|
179
|
+
else
|
180
|
+
@main_loop.quit
|
181
|
+
end
|
182
|
+
GLib::Source::REMOVE
|
183
|
+
end
|
184
|
+
|
185
|
+
begin
|
186
|
+
yield
|
187
|
+
ensure
|
188
|
+
GLib::Source.remove(timeout_id) if timeout_id
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
def compute_timeout_second
|
193
|
+
default_timeout = 5
|
194
|
+
timeout_string =
|
195
|
+
ENV["CHUPA_TEXT_DECOMPOSER_WEBKIT_TIMEOUT"] || default_timeout.to_s
|
196
|
+
begin
|
197
|
+
Integer(timeout_string)
|
198
|
+
rescue ArgumentError
|
199
|
+
default_timeout
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
private
|
204
|
+
def log_tag
|
205
|
+
"[decomposer][webkit]"
|
206
|
+
end
|
207
|
+
|
208
|
+
def debug(*args, &block)
|
209
|
+
@logger.debug(*args, &block)
|
210
|
+
end
|
211
|
+
|
212
|
+
def error(*args, &block)
|
213
|
+
@logger.error(*args, &block)
|
214
|
+
end
|
215
|
+
end
|
216
|
+
end
|
@@ -14,7 +14,8 @@
|
|
14
14
|
# License along with this library; if not, write to the Free Software
|
15
15
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
16
16
|
|
17
|
-
require "
|
17
|
+
require "English"
|
18
|
+
require "rbconfig"
|
18
19
|
|
19
20
|
module ChupaText
|
20
21
|
module Decomposers
|
@@ -57,161 +58,90 @@ module ChupaText
|
|
57
58
|
false
|
58
59
|
end
|
59
60
|
|
61
|
+
IN_PROCESS = ENV["CHUPA_TEXT_DECOMPOSER_WEBKIT_IN_PROCESS"] == "yes"
|
62
|
+
if IN_PROCESS
|
63
|
+
require "chupa-text-decomposer-webkit/screenshoter"
|
64
|
+
end
|
65
|
+
|
60
66
|
def decompose(data)
|
61
|
-
|
62
|
-
|
67
|
+
body = data.source.body
|
68
|
+
uri = data.source.uri.to_s
|
69
|
+
output = Tempfile.new(["chupa-text-decomposer-webkit", ".png"])
|
70
|
+
width, height = data.expected_screenshot_size
|
71
|
+
if IN_PROCESS
|
72
|
+
screenshoter = ChupaTextDecomposerWebKit::Screenshoter.new(logger)
|
73
|
+
screenshoter.run(body, uri, output.path, width, height)
|
74
|
+
else
|
75
|
+
screenshoter = ExternalScreenshoter.new
|
76
|
+
screenshoter.run(data.source.path, uri, output.path, width, height)
|
77
|
+
end
|
78
|
+
unless File.size(output.path).zero?
|
79
|
+
png = output.read
|
80
|
+
data.screenshot = Screenshot.new("image/png",
|
81
|
+
[png].pack("m*"),
|
82
|
+
"base64")
|
83
|
+
end
|
63
84
|
data[AVAILABLE_ATTRIBUTE_NAME] = !data.screenshot.nil?
|
64
85
|
yield(data)
|
65
86
|
end
|
66
87
|
|
67
|
-
class
|
88
|
+
class ExternalScreenshoter
|
68
89
|
include Loggable
|
69
90
|
include LogTag
|
70
91
|
|
71
|
-
def initialize
|
72
|
-
@
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
view = WebKit2Gtk::WebView.new(context: @@view_context)
|
80
|
-
window = Gtk::OffscreenWindow.new
|
81
|
-
window.set_default_size(800, 600)
|
82
|
-
window.add(view)
|
83
|
-
window.show_all
|
84
|
-
|
85
|
-
setup_callbacks(view)
|
86
|
-
|
87
|
-
timeout(view) do
|
88
|
-
debug do
|
89
|
-
"#{log_tag}[load][HTML] #{@data.uri}"
|
90
|
-
end
|
91
|
-
view.load_html(@data.source.body, @data.source.uri.to_s)
|
92
|
-
@main_loop.run
|
93
|
-
end
|
94
|
-
|
95
|
-
window.destroy
|
96
|
-
end
|
97
|
-
|
98
|
-
private
|
99
|
-
def create_view_context
|
100
|
-
context = WebKit2Gtk::WebContext.new(ephemeral: true)
|
101
|
-
http_proxy = ENV["http_proxy"]
|
102
|
-
https_proxy = ENV["https_proxy"]
|
103
|
-
ftp_proxy = ENV["ftp_proxy"]
|
104
|
-
if http_proxy or https_proxy or ftp_proxy
|
105
|
-
proxy_settings = WebKit2Gtk::NetworkProxySettings.new
|
106
|
-
if http_proxy
|
107
|
-
proxy_settings.add_proxy_for_scheme("http", http_proxy)
|
108
|
-
end
|
109
|
-
if https_proxy
|
110
|
-
proxy_settings.add_proxy_for_scheme("https", https_proxy)
|
111
|
-
end
|
112
|
-
if ftp_proxy
|
113
|
-
proxy_settings.add_proxy_for_scheme("ftp", ftp_proxy)
|
114
|
-
end
|
115
|
-
context.set_network_proxy_settings(:custom, proxy_settings)
|
116
|
-
end
|
117
|
-
context
|
92
|
+
def initialize
|
93
|
+
@screenshoter = File.join(__dir__,
|
94
|
+
"..",
|
95
|
+
"..",
|
96
|
+
"..",
|
97
|
+
"bin",
|
98
|
+
"chupa-text-decomposer-webkit-screenshoter")
|
99
|
+
@command = ExternalCommand.new(RbConfig.ruby)
|
118
100
|
end
|
119
101
|
|
120
|
-
def
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
debug do
|
135
|
-
size = "#{snapshot_surface.width}x#{snapshot_surface.height}"
|
136
|
-
"#{log_tag}[screenshot][finish] #{view.uri}: #{size}"
|
137
|
-
end
|
138
|
-
unless snapshot_surface.width.zero?
|
139
|
-
png = convert_snapshot_surface_to_png(snapshot_surface)
|
140
|
-
@data.screenshot = Screenshot.new("image/png",
|
141
|
-
[png].pack("m*"),
|
142
|
-
"base64")
|
143
|
-
end
|
102
|
+
def run(html_path, uri, output_path, width, height)
|
103
|
+
output_read, output_write = IO.pipe
|
104
|
+
error_output = Tempfile.new("chupa-text-decomposer-webkit-error")
|
105
|
+
output_reader = Thread.new do
|
106
|
+
loop do
|
107
|
+
IO.select([output_read])
|
108
|
+
line = output_read.gets
|
109
|
+
break if line.nil?
|
110
|
+
|
111
|
+
case line.chomp
|
112
|
+
when /\Adebug: /
|
113
|
+
debug($POSTMATCH)
|
114
|
+
when /\Aerror: /
|
115
|
+
error($POSTMATCH)
|
144
116
|
end
|
145
117
|
end
|
146
118
|
end
|
147
|
-
|
148
|
-
|
119
|
+
successed = @command.run(@screenshoter,
|
120
|
+
html_path,
|
121
|
+
uri,
|
122
|
+
output_path,
|
123
|
+
width.to_s,
|
124
|
+
height.to_s,
|
125
|
+
{
|
126
|
+
:spawn_options => {
|
127
|
+
:out => output_write,
|
128
|
+
:err => error_output.path,
|
129
|
+
},
|
130
|
+
})
|
131
|
+
output_write.close
|
132
|
+
output_reader.join
|
133
|
+
|
134
|
+
unless successed
|
149
135
|
error do
|
150
|
-
message = "failed to
|
151
|
-
message << "#{
|
152
|
-
"#{log_tag}[
|
136
|
+
message = "failed to external screenshoter: #{uri}: "
|
137
|
+
message << "#{@command.path} #{@screenshoter}"
|
138
|
+
"#{log_tag}[external-screenshoter][run][failed] #{message}"
|
153
139
|
end
|
154
|
-
true
|
155
140
|
end
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
screenshot_width, screenshot_height = @data.expected_screenshot_size
|
160
|
-
|
161
|
-
screenshot_surface = Cairo::ImageSurface.new(:argb32,
|
162
|
-
screenshot_width,
|
163
|
-
screenshot_height)
|
164
|
-
context = Cairo::Context.new(screenshot_surface)
|
165
|
-
context.set_source_color(:white)
|
166
|
-
context.paint
|
167
|
-
|
168
|
-
ratio = screenshot_width.to_f / snapshot_surface.width
|
169
|
-
context.scale(ratio, ratio)
|
170
|
-
context.set_source(snapshot_surface)
|
171
|
-
context.paint
|
172
|
-
|
173
|
-
png = StringIO.new
|
174
|
-
screenshot_surface.write_to_png(png)
|
175
|
-
png.string
|
176
|
-
end
|
177
|
-
|
178
|
-
def timeout(view)
|
179
|
-
timeout_id = GLib::Timeout.add_seconds(@timeout_second) do
|
180
|
-
timeout_id = nil
|
181
|
-
error do
|
182
|
-
message = "timeout to load URI: #{@timeout_second}s: #{view.uri}"
|
183
|
-
message << ": loading" if view.loading?
|
184
|
-
"#{log_tag}[load][timeout] #{message}"
|
141
|
+
unless error_output.size.zero?
|
142
|
+
error_output.each_line do |line|
|
143
|
+
error(line)
|
185
144
|
end
|
186
|
-
if view.loading?
|
187
|
-
view.signal_connect("close") do
|
188
|
-
@main_loop.quit
|
189
|
-
error do
|
190
|
-
"#{log_tag}[load][closed] #{view.uri}"
|
191
|
-
end
|
192
|
-
end
|
193
|
-
view.try_close
|
194
|
-
else
|
195
|
-
@main_loop.quit
|
196
|
-
end
|
197
|
-
GLib::Source::REMOVE
|
198
|
-
end
|
199
|
-
|
200
|
-
begin
|
201
|
-
yield
|
202
|
-
ensure
|
203
|
-
GLib::Source.remove(timeout_id) if timeout_id
|
204
|
-
end
|
205
|
-
end
|
206
|
-
|
207
|
-
def compute_timeout_second
|
208
|
-
default_timeout = 5
|
209
|
-
timeout_string =
|
210
|
-
ENV["CHUPA_TEXT_DECOMPOSER_WEBKIT_TIMEOUT"] || default_timeout.to_s
|
211
|
-
begin
|
212
|
-
Integer(timeout_string)
|
213
|
-
rescue ArgumentError
|
214
|
-
default_timeout
|
215
145
|
end
|
216
146
|
end
|
217
147
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chupa-text-decomposer-webkit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kouhei Sutou
|
@@ -126,6 +126,7 @@ files:
|
|
126
126
|
- Rakefile
|
127
127
|
- chupa-text-decomposer-webkit.gemspec
|
128
128
|
- doc/text/news.md
|
129
|
+
- lib/chupa-text-decomposer-webkit/screenshoter.rb
|
129
130
|
- lib/chupa-text/decomposers/webkit.rb
|
130
131
|
- test/run-test.rb
|
131
132
|
- test/test-webkit.rb
|