ferrum_common 0.0.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ferrum_common.gemspec +4 -2
- data/lib/ferrum_common.rb +160 -20
- metadata +30 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e128a92c5b106f583d12cf8cc772b95637272dae11711e1095916e8485ccd9c5
|
4
|
+
data.tar.gz: c52cb59ea98be841986e6930e6a21456636491684cbe2d812f474dff996bb105
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: aee22d614c6b08f217fcf0786083ef0b7a4aa0c154f79948916fe3433796c1255168df736ae6fdac82e3e0fd4d91ce309aaddadd0fc80e93087856e4cc355789
|
7
|
+
data.tar.gz: 6a2939158a8d3dc9da7938d63cad598fb005c88ef0307418ea9dd8a6f5eb3a8af34a1ad5c59257e23d7a99150a7c627520f9d26e1abe9c69ca4c3a0292f52546
|
data/ferrum_common.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = "ferrum_common"
|
3
|
-
spec.version = "0.
|
3
|
+
spec.version = "0.2.0"
|
4
4
|
spec.summary = "[WIP] common useful extensions for ferrum or cuprite"
|
5
5
|
|
6
6
|
spec.author = "Victor Maslov aka Nakilon"
|
@@ -9,7 +9,9 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.metadata = {"source_code_uri" => "https://github.com/nakilon/ferrum_common"}
|
10
10
|
|
11
11
|
spec.add_dependency "ferrum"
|
12
|
-
spec.
|
12
|
+
spec.add_dependency "browser_reposition"
|
13
|
+
spec.add_dependency "nokogiri"
|
14
|
+
spec.required_ruby_version = ">=2.5" # why?
|
13
15
|
|
14
16
|
spec.files = %w{ LICENSE ferrum_common.gemspec lib/ferrum_common.rb }
|
15
17
|
end
|
data/lib/ferrum_common.rb
CHANGED
@@ -1,28 +1,168 @@
|
|
1
1
|
require "ferrum"
|
2
2
|
module FerrumCommon
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
3
|
+
|
4
|
+
module Common
|
5
|
+
|
6
|
+
private def mhtml browser, timeout, mtd, msg = nil
|
7
|
+
Timeout.timeout(timeout){ yield }
|
8
|
+
rescue Timeout::Error
|
9
|
+
browser.mhtml path: "temp.mhtml"
|
10
|
+
STDERR.puts "dumped to ./temp.mhtml"
|
11
|
+
$!.backtrace.reject!{ |_| _[/\/gems\/concurrent-ruby-/] }
|
12
|
+
$!.backtrace.reject!{ |_| _[/\/gems\/ferrum-/] }
|
13
|
+
raise Timeout::Error, "#{$!.to_s} after #{timeout} sec in #{mtd}#{" (#{msg.respond_to?(:call) ? msg.call : msg})" if msg}"
|
14
|
+
end
|
15
|
+
|
16
|
+
def until_true timeout, msg = nil
|
17
|
+
mhtml self, timeout, __method__, msg do
|
18
|
+
begin
|
19
|
+
yield
|
20
|
+
rescue Ferrum::NodeNotFoundError
|
21
|
+
redo
|
22
|
+
end or (sleep timeout*0.1; redo)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def until_one type, selector, timeout
|
27
|
+
t = nil
|
28
|
+
mhtml self, timeout, __method__, ->{ "expected exactly one node for #{type} #{selector.inspect}, got #{t ? t.size : "none"}" } do
|
29
|
+
t = begin
|
30
|
+
public_method(type).call selector
|
31
|
+
end
|
32
|
+
unless 1 == t.size
|
33
|
+
sleep timeout * 0.1
|
34
|
+
redo
|
14
35
|
end
|
15
|
-
rescue Timeout::Error
|
16
|
-
browser.mhtml path: "temp.mhtml"
|
17
|
-
$!.backtrace.reject!{ |_| _[/\/gems\/concurrent-ruby-/] }
|
18
|
-
$!.backtrace.reject!{ |_| _[/\/gems\/ferrum-/] }
|
19
|
-
raise Timeout::Error, "#{$!.to_s} in redo_until_true #{" (#{msg})" if msg}"
|
20
36
|
end
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
37
|
+
t.first
|
38
|
+
end
|
39
|
+
|
40
|
+
def abort msg_or_cause
|
41
|
+
# puts (msg_or_cause.respond_to?(:backtrace) ? msg_or_cause : Thread.current).backtrace
|
42
|
+
puts (msg_or_cause.respond_to?(:full_message) ? msg_or_cause.full_message : Thread.current.backtrace)
|
43
|
+
mhtml path: "temp.mhtml"
|
44
|
+
STDERR.puts "dumped to ./temp.mhtml"
|
45
|
+
Kernel.abort msg_or_cause.to_s
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
49
|
+
Ferrum::Page.include Common
|
50
|
+
Ferrum::Frame.include Common
|
51
|
+
|
52
|
+
if "darwin" == Gem::Platform.local.os
|
53
|
+
require "browser_reposition"
|
54
|
+
Ferrum::Browser.include Common, BrowserReposition
|
55
|
+
def self.new **_
|
56
|
+
Ferrum::Browser.new(**_).tap(&:reposition)
|
57
|
+
end
|
58
|
+
else
|
59
|
+
Ferrum::Browser.include Common
|
60
|
+
def self.new **_
|
61
|
+
Ferrum::Browser.new **_
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# https://datatracker.ietf.org/doc/html/rfc2557
|
66
|
+
# https://en.wikipedia.org/wiki/Quoted-printable
|
67
|
+
# require "strscan"
|
68
|
+
require "nokogiri" # Oga crashes on vk charset
|
69
|
+
def self.process_mhtml mht
|
70
|
+
scanner = ::StringScanner.new mht
|
71
|
+
fail scanner.peek(100).inspect unless scanner.scan(/\AFrom: <Saved by Blink>\r
|
72
|
+
Snapshot-Content-Location: \S+\r
|
73
|
+
Subject:(?: \S+\r\n)+Date: [A-Z][a-z][a-z], \d\d? [A-Z][a-z][a-z] 20\d\d \d\d:\d\d:\d\d -0000\r
|
74
|
+
MIME-Version: 1\.0\r
|
75
|
+
Content-Type: multipart\/related;\r
|
76
|
+
\ttype="text\/html";\r
|
77
|
+
\tboundary="(----MultipartBoundary--[a-zA-Z0-9]{42}----)"\r\n\r\n\r\n--\1/)
|
78
|
+
delimeter = scanner[1]
|
79
|
+
fail unless scanner.charpos == prev = scanner.pos
|
80
|
+
reps = []
|
81
|
+
while s = scanner.search_full(::Regexp.new(delimeter), true, true)
|
82
|
+
doc = s[0...-delimeter.size-4]
|
83
|
+
case doc
|
84
|
+
when /\A\r\nContent-Type: text\/html\r
|
85
|
+
Content-ID: <frame-[A-Z0-9]{32}@mhtml\.blink>\r
|
86
|
+
Content-Transfer-Encoding: quoted-printable\r
|
87
|
+
Content-Location: chrome-error:\/\/chromewebdata\/\r\n\r\n/,
|
88
|
+
/\A\r\nContent-Type: text\/html\r
|
89
|
+
Content-ID: <frame-[A-Z0-9]{32}@mhtml\.blink>\r
|
90
|
+
Content-Transfer-Encoding: quoted-printable\r\n\r\n/
|
91
|
+
puts "trash #{$'.size}"
|
92
|
+
reps.push [prev-delimeter.size-2, scanner.pos-delimeter.size-4, "", ""]
|
93
|
+
when /\A\r\nContent-Type: text\/html\r
|
94
|
+
Content-ID: <frame-[A-Z0-9]{32}@mhtml\.blink>\r
|
95
|
+
Content-Transfer-Encoding: quoted-printable\r
|
96
|
+
Content-Location: \S+\r\n\r\n/
|
97
|
+
puts "html #{$'.size}"
|
98
|
+
header = $&
|
99
|
+
t = $'.gsub(/=([0-9A-F][0-9A-F])/){ fail $1 unless "3D" == $1 || "20" == $1 || "0A" == $1 unless "80" <= $1; $1.hex.chr }.gsub("=\r\n", "")
|
100
|
+
puts "unpacked #{t.size}"
|
101
|
+
html = ::Nokogiri::HTML t#.force_encoding "utf-8"
|
102
|
+
|
103
|
+
puts ".to_s.size #{html.to_s.size}"
|
104
|
+
|
105
|
+
html.xpath("//*[not(*)]").group_by(&:name).
|
106
|
+
map{ |_, g| [_, g.map(&:to_s).map(&:size).reduce(:+)] }.
|
107
|
+
sort_by(&:last).reverse.take(5).each &method(:p)
|
108
|
+
|
109
|
+
if block_given?
|
110
|
+
yield html
|
111
|
+
puts "yielded"
|
112
|
+
puts "yield #{html.to_s.size}"
|
113
|
+
end
|
114
|
+
|
115
|
+
reps.push [prev, scanner.pos-delimeter.size-4, header, html.to_s, true, :html]
|
116
|
+
when /\A\r\nContent-Type: text\/css\r
|
117
|
+
Content-Transfer-Encoding: quoted-printable\r
|
118
|
+
Content-Location: \S+\r\n\r\n/
|
119
|
+
puts "css > #{$'.size}"
|
120
|
+
header = $&
|
121
|
+
css = $'.gsub(/=([0-9A-F][0-9A-F])/){ fail $1 unless "3D" == $1 || "20" == $1 || "0A" == $1 unless "80" <= $1; $1.hex.chr }.gsub("=\r\n", "")
|
122
|
+
css.gsub!(/[\r\n]+/, "\n")
|
123
|
+
|
124
|
+
puts "css < #{css.size}"
|
125
|
+
reps.push [prev, scanner.pos-delimeter.size-4, header, css, true, :css]
|
126
|
+
|
127
|
+
when /\A\r\nContent-Type: image\/(webp|png|gif|jpeg)\r
|
128
|
+
Content-Transfer-Encoding: base64\r
|
129
|
+
Content-Location: \S+\r\n\r\n/
|
130
|
+
puts "#{$1} #{$'.size}"
|
131
|
+
when /\A\r\nContent-Type: image\/svg\+xml\r
|
132
|
+
Content-Transfer-Encoding: quoted-printable\r
|
133
|
+
Content-Location: \S+\r\n\r\n/
|
134
|
+
puts "svg #{$'.size}"
|
135
|
+
else
|
136
|
+
puts doc[0..300]
|
137
|
+
fail
|
25
138
|
end
|
139
|
+
fail unless scanner.charpos == prev = scanner.pos
|
140
|
+
end
|
141
|
+
|
142
|
+
p is = reps.map.with_index{ |(_, _, _, _, _, type), i| i if :html == type }.compact
|
143
|
+
p cs = reps.map.with_index{ |(_, _, _, _, _, type), i| i if :css == type }.compact
|
144
|
+
cs.each_cons(2){ |i,j| fail unless i+1==j }
|
145
|
+
fail unless is == [cs[0]-1]
|
146
|
+
File.write "temp.htm", reps[is[0]][3]
|
147
|
+
puts "css > #{File.size "temp.css"}"
|
148
|
+
File.open("temp.css", "w"){ |f| cs.each{ |i| f.puts reps[i][3] } }
|
149
|
+
system "uncss temp.htm -s temp.css -o out.css"
|
150
|
+
puts "css < #{File.size "out.css"}"
|
151
|
+
reps[cs[0]][1] = reps[cs[-1]][1]
|
152
|
+
reps[cs[0]+1..cs[-1]] = []
|
153
|
+
reps[cs[0]][3] = File.read "out.css"
|
154
|
+
|
155
|
+
reps.reverse_each do |from, to, header, str, qp|
|
156
|
+
str = qp ?
|
157
|
+
header + str.gsub("=", "=3D").
|
158
|
+
b.gsub(/[\x80-\xFF]/n){ |_| "=%02X" % _.ord }.
|
159
|
+
gsub(/.{73}[^=][^=](?=.)/, "\\0=\r\n") :
|
160
|
+
header + str.gsub("\n", "\r\n")
|
161
|
+
p [str.size, "to - from = #{to - from}"]
|
162
|
+
mht[from...to] = str
|
26
163
|
end
|
164
|
+
p ::File.write "temp.mht", mht
|
165
|
+
puts "OK"
|
27
166
|
end
|
167
|
+
|
28
168
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ferrum_common
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Victor Maslov aka Nakilon
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-09-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ferrum
|
@@ -24,6 +24,34 @@ dependencies:
|
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: browser_reposition
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: nokogiri
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
27
55
|
description:
|
28
56
|
email: nakilon@gmail.com
|
29
57
|
executables: []
|