ferrum_common 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ferrum_common.gemspec +2 -1
- data/lib/ferrum_common.rb +119 -11
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e128a92c5b106f583d12cf8cc772b95637272dae11711e1095916e8485ccd9c5
|
4
|
+
data.tar.gz: c52cb59ea98be841986e6930e6a21456636491684cbe2d812f474dff996bb105
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: aee22d614c6b08f217fcf0786083ef0b7a4aa0c154f79948916fe3433796c1255168df736ae6fdac82e3e0fd4d91ce309aaddadd0fc80e93087856e4cc355789
|
7
|
+
data.tar.gz: 6a2939158a8d3dc9da7938d63cad598fb005c88ef0307418ea9dd8a6f5eb3a8af34a1ad5c59257e23d7a99150a7c627520f9d26e1abe9c69ca4c3a0292f52546
|
data/ferrum_common.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
2
|
spec.name = "ferrum_common"
|
3
|
-
spec.version = "0.
|
3
|
+
spec.version = "0.2.0"
|
4
4
|
spec.summary = "[WIP] common useful extensions for ferrum or cuprite"
|
5
5
|
|
6
6
|
spec.author = "Victor Maslov aka Nakilon"
|
@@ -10,6 +10,7 @@ Gem::Specification.new do |spec|
|
|
10
10
|
|
11
11
|
spec.add_dependency "ferrum"
|
12
12
|
spec.add_dependency "browser_reposition"
|
13
|
+
spec.add_dependency "nokogiri"
|
13
14
|
spec.required_ruby_version = ">=2.5" # why?
|
14
15
|
|
15
16
|
spec.files = %w{ LICENSE ferrum_common.gemspec lib/ferrum_common.rb }
|
data/lib/ferrum_common.rb
CHANGED
@@ -3,17 +3,18 @@ module FerrumCommon
|
|
3
3
|
|
4
4
|
module Common
|
5
5
|
|
6
|
-
def
|
6
|
+
private def mhtml browser, timeout, mtd, msg = nil
|
7
7
|
Timeout.timeout(timeout){ yield }
|
8
8
|
rescue Timeout::Error
|
9
9
|
browser.mhtml path: "temp.mhtml"
|
10
|
+
STDERR.puts "dumped to ./temp.mhtml"
|
10
11
|
$!.backtrace.reject!{ |_| _[/\/gems\/concurrent-ruby-/] }
|
11
12
|
$!.backtrace.reject!{ |_| _[/\/gems\/ferrum-/] }
|
12
13
|
raise Timeout::Error, "#{$!.to_s} after #{timeout} sec in #{mtd}#{" (#{msg.respond_to?(:call) ? msg.call : msg})" if msg}"
|
13
14
|
end
|
14
15
|
|
15
16
|
def until_true timeout, msg = nil
|
16
|
-
|
17
|
+
mhtml self, timeout, __method__, msg do
|
17
18
|
begin
|
18
19
|
yield
|
19
20
|
rescue Ferrum::NodeNotFoundError
|
@@ -24,12 +25,9 @@ module FerrumCommon
|
|
24
25
|
|
25
26
|
def until_one type, selector, timeout
|
26
27
|
t = nil
|
27
|
-
|
28
|
+
mhtml self, timeout, __method__, ->{ "expected exactly one node for #{type} #{selector.inspect}, got #{t ? t.size : "none"}" } do
|
28
29
|
t = begin
|
29
30
|
public_method(type).call selector
|
30
|
-
rescue Ferrum::NodeNotFoundError
|
31
|
-
sleep timeout * 0.1
|
32
|
-
redo
|
33
31
|
end
|
34
32
|
unless 1 == t.size
|
35
33
|
sleep timeout * 0.1
|
@@ -43,7 +41,7 @@ module FerrumCommon
|
|
43
41
|
# puts (msg_or_cause.respond_to?(:backtrace) ? msg_or_cause : Thread.current).backtrace
|
44
42
|
puts (msg_or_cause.respond_to?(:full_message) ? msg_or_cause.full_message : Thread.current.backtrace)
|
45
43
|
mhtml path: "temp.mhtml"
|
46
|
-
puts "dumped to ./temp.mhtml"
|
44
|
+
STDERR.puts "dumped to ./temp.mhtml"
|
47
45
|
Kernel.abort msg_or_cause.to_s
|
48
46
|
end
|
49
47
|
|
@@ -51,10 +49,120 @@ module FerrumCommon
|
|
51
49
|
Ferrum::Page.include Common
|
52
50
|
Ferrum::Frame.include Common
|
53
51
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
52
|
+
if "darwin" == Gem::Platform.local.os
|
53
|
+
require "browser_reposition"
|
54
|
+
Ferrum::Browser.include Common, BrowserReposition
|
55
|
+
def self.new **_
|
56
|
+
Ferrum::Browser.new(**_).tap(&:reposition)
|
57
|
+
end
|
58
|
+
else
|
59
|
+
Ferrum::Browser.include Common
|
60
|
+
def self.new **_
|
61
|
+
Ferrum::Browser.new **_
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# https://datatracker.ietf.org/doc/html/rfc2557
|
66
|
+
# https://en.wikipedia.org/wiki/Quoted-printable
|
67
|
+
# require "strscan"
|
68
|
+
require "nokogiri" # Oga crashes on vk charset
|
69
|
+
def self.process_mhtml mht
|
70
|
+
scanner = ::StringScanner.new mht
|
71
|
+
fail scanner.peek(100).inspect unless scanner.scan(/\AFrom: <Saved by Blink>\r
|
72
|
+
Snapshot-Content-Location: \S+\r
|
73
|
+
Subject:(?: \S+\r\n)+Date: [A-Z][a-z][a-z], \d\d? [A-Z][a-z][a-z] 20\d\d \d\d:\d\d:\d\d -0000\r
|
74
|
+
MIME-Version: 1\.0\r
|
75
|
+
Content-Type: multipart\/related;\r
|
76
|
+
\ttype="text\/html";\r
|
77
|
+
\tboundary="(----MultipartBoundary--[a-zA-Z0-9]{42}----)"\r\n\r\n\r\n--\1/)
|
78
|
+
delimeter = scanner[1]
|
79
|
+
fail unless scanner.charpos == prev = scanner.pos
|
80
|
+
reps = []
|
81
|
+
while s = scanner.search_full(::Regexp.new(delimeter), true, true)
|
82
|
+
doc = s[0...-delimeter.size-4]
|
83
|
+
case doc
|
84
|
+
when /\A\r\nContent-Type: text\/html\r
|
85
|
+
Content-ID: <frame-[A-Z0-9]{32}@mhtml\.blink>\r
|
86
|
+
Content-Transfer-Encoding: quoted-printable\r
|
87
|
+
Content-Location: chrome-error:\/\/chromewebdata\/\r\n\r\n/,
|
88
|
+
/\A\r\nContent-Type: text\/html\r
|
89
|
+
Content-ID: <frame-[A-Z0-9]{32}@mhtml\.blink>\r
|
90
|
+
Content-Transfer-Encoding: quoted-printable\r\n\r\n/
|
91
|
+
puts "trash #{$'.size}"
|
92
|
+
reps.push [prev-delimeter.size-2, scanner.pos-delimeter.size-4, "", ""]
|
93
|
+
when /\A\r\nContent-Type: text\/html\r
|
94
|
+
Content-ID: <frame-[A-Z0-9]{32}@mhtml\.blink>\r
|
95
|
+
Content-Transfer-Encoding: quoted-printable\r
|
96
|
+
Content-Location: \S+\r\n\r\n/
|
97
|
+
puts "html #{$'.size}"
|
98
|
+
header = $&
|
99
|
+
t = $'.gsub(/=([0-9A-F][0-9A-F])/){ fail $1 unless "3D" == $1 || "20" == $1 || "0A" == $1 unless "80" <= $1; $1.hex.chr }.gsub("=\r\n", "")
|
100
|
+
puts "unpacked #{t.size}"
|
101
|
+
html = ::Nokogiri::HTML t#.force_encoding "utf-8"
|
102
|
+
|
103
|
+
puts ".to_s.size #{html.to_s.size}"
|
104
|
+
|
105
|
+
html.xpath("//*[not(*)]").group_by(&:name).
|
106
|
+
map{ |_, g| [_, g.map(&:to_s).map(&:size).reduce(:+)] }.
|
107
|
+
sort_by(&:last).reverse.take(5).each &method(:p)
|
108
|
+
|
109
|
+
if block_given?
|
110
|
+
yield html
|
111
|
+
puts "yielded"
|
112
|
+
puts "yield #{html.to_s.size}"
|
113
|
+
end
|
114
|
+
|
115
|
+
reps.push [prev, scanner.pos-delimeter.size-4, header, html.to_s, true, :html]
|
116
|
+
when /\A\r\nContent-Type: text\/css\r
|
117
|
+
Content-Transfer-Encoding: quoted-printable\r
|
118
|
+
Content-Location: \S+\r\n\r\n/
|
119
|
+
puts "css > #{$'.size}"
|
120
|
+
header = $&
|
121
|
+
css = $'.gsub(/=([0-9A-F][0-9A-F])/){ fail $1 unless "3D" == $1 || "20" == $1 || "0A" == $1 unless "80" <= $1; $1.hex.chr }.gsub("=\r\n", "")
|
122
|
+
css.gsub!(/[\r\n]+/, "\n")
|
123
|
+
|
124
|
+
puts "css < #{css.size}"
|
125
|
+
reps.push [prev, scanner.pos-delimeter.size-4, header, css, true, :css]
|
126
|
+
|
127
|
+
when /\A\r\nContent-Type: image\/(webp|png|gif|jpeg)\r
|
128
|
+
Content-Transfer-Encoding: base64\r
|
129
|
+
Content-Location: \S+\r\n\r\n/
|
130
|
+
puts "#{$1} #{$'.size}"
|
131
|
+
when /\A\r\nContent-Type: image\/svg\+xml\r
|
132
|
+
Content-Transfer-Encoding: quoted-printable\r
|
133
|
+
Content-Location: \S+\r\n\r\n/
|
134
|
+
puts "svg #{$'.size}"
|
135
|
+
else
|
136
|
+
puts doc[0..300]
|
137
|
+
fail
|
138
|
+
end
|
139
|
+
fail unless scanner.charpos == prev = scanner.pos
|
140
|
+
end
|
141
|
+
|
142
|
+
p is = reps.map.with_index{ |(_, _, _, _, _, type), i| i if :html == type }.compact
|
143
|
+
p cs = reps.map.with_index{ |(_, _, _, _, _, type), i| i if :css == type }.compact
|
144
|
+
cs.each_cons(2){ |i,j| fail unless i+1==j }
|
145
|
+
fail unless is == [cs[0]-1]
|
146
|
+
File.write "temp.htm", reps[is[0]][3]
|
147
|
+
puts "css > #{File.size "temp.css"}"
|
148
|
+
File.open("temp.css", "w"){ |f| cs.each{ |i| f.puts reps[i][3] } }
|
149
|
+
system "uncss temp.htm -s temp.css -o out.css"
|
150
|
+
puts "css < #{File.size "out.css"}"
|
151
|
+
reps[cs[0]][1] = reps[cs[-1]][1]
|
152
|
+
reps[cs[0]+1..cs[-1]] = []
|
153
|
+
reps[cs[0]][3] = File.read "out.css"
|
154
|
+
|
155
|
+
reps.reverse_each do |from, to, header, str, qp|
|
156
|
+
str = qp ?
|
157
|
+
header + str.gsub("=", "=3D").
|
158
|
+
b.gsub(/[\x80-\xFF]/n){ |_| "=%02X" % _.ord }.
|
159
|
+
gsub(/.{73}[^=][^=](?=.)/, "\\0=\r\n") :
|
160
|
+
header + str.gsub("\n", "\r\n")
|
161
|
+
p [str.size, "to - from = #{to - from}"]
|
162
|
+
mht[from...to] = str
|
163
|
+
end
|
164
|
+
p ::File.write "temp.mht", mht
|
165
|
+
puts "OK"
|
58
166
|
end
|
59
167
|
|
60
168
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ferrum_common
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Victor Maslov aka Nakilon
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-09-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ferrum
|
@@ -38,6 +38,20 @@ dependencies:
|
|
38
38
|
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: nokogiri
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
41
55
|
description:
|
42
56
|
email: nakilon@gmail.com
|
43
57
|
executables: []
|