ferrum_common 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ferrum_common.gemspec +1 -1
- data/lib/ferrum_common.rb +26 -24
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3f66888a248e7235fdccb3510c7370d708c2052868d828c4553c41b6b0d3411e
|
4
|
+
data.tar.gz: e57b7c549817f6294703ee108d2b54d400d34735fc8cde5a8f63f27546aa9562
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b87cac7f5c32144f8955c436f37d1b98b4562a771fd12607bbfee6524c983288b0eba127474fa12ded126bb5abf0673740eda70dc2e0144a7fb8c2c0f24b04f4
|
7
|
+
data.tar.gz: 7731364485b2bf8be92c6d4e4aaa4cbd9465d1cc9386cd0cbfa9d5e24f26f33065f3c68ebbcb55cea6c30fce320b50a719a62b3cbddfce69282952f39ebad6e7
|
data/ferrum_common.gemspec
CHANGED
data/lib/ferrum_common.rb
CHANGED
@@ -3,7 +3,7 @@ module FerrumCommon
|
|
3
3
|
|
4
4
|
module Common
|
5
5
|
|
6
|
-
private def
|
6
|
+
private def yield_with_timeout browser, timeout, mtd, msg = nil
|
7
7
|
Timeout.timeout(timeout){ yield }
|
8
8
|
rescue Timeout::Error
|
9
9
|
browser.mhtml path: "temp.mhtml"
|
@@ -14,7 +14,7 @@ module FerrumCommon
|
|
14
14
|
end
|
15
15
|
|
16
16
|
def until_true timeout, msg = nil
|
17
|
-
|
17
|
+
yield_with_timeout self, timeout, __method__, msg do
|
18
18
|
begin
|
19
19
|
yield
|
20
20
|
rescue Ferrum::NodeNotFoundError
|
@@ -25,7 +25,7 @@ module FerrumCommon
|
|
25
25
|
|
26
26
|
def until_one type, selector, timeout
|
27
27
|
t = nil
|
28
|
-
|
28
|
+
yield_with_timeout self, timeout, __method__, ->{ "expected exactly one node for #{type} #{selector.inspect}, got #{t ? t.size : "none"}" } do
|
29
29
|
t = begin
|
30
30
|
public_method(type).call selector
|
31
31
|
end
|
@@ -66,11 +66,11 @@ module FerrumCommon
|
|
66
66
|
# https://en.wikipedia.org/wiki/Quoted-printable
|
67
67
|
# require "strscan"
|
68
68
|
require "nokogiri" # Oga crashes on vk charset
|
69
|
-
def self.process_mhtml
|
70
|
-
scanner = ::StringScanner.new
|
71
|
-
fail scanner.peek(
|
69
|
+
def self.process_mhtml
|
70
|
+
scanner = ::StringScanner.new(mht = ARGF.read)
|
71
|
+
fail scanner.peek(400).inspect unless scanner.scan(/\AFrom: <Saved by Blink>\r
|
72
72
|
Snapshot-Content-Location: \S+\r
|
73
|
-
Subject:(?: \S
|
73
|
+
Subject:(?: \S.*\r\n)+Date: [A-Z][a-z][a-z], \d\d? [A-Z][a-z][a-z] 20\d\d \d\d:\d\d:\d\d -0000\r
|
74
74
|
MIME-Version: 1\.0\r
|
75
75
|
Content-Type: multipart\/related;\r
|
76
76
|
\ttype="text\/html";\r
|
@@ -88,19 +88,19 @@ Content-Location: chrome-error:\/\/chromewebdata\/\r\n\r\n/,
|
|
88
88
|
/\A\r\nContent-Type: text\/html\r
|
89
89
|
Content-ID: <frame-[A-Z0-9]{32}@mhtml\.blink>\r
|
90
90
|
Content-Transfer-Encoding: quoted-printable\r\n\r\n/
|
91
|
-
puts "trash #{$'.size}"
|
91
|
+
STDERR.puts "trash #{$'.size}"
|
92
92
|
reps.push [prev-delimeter.size-2, scanner.pos-delimeter.size-4, "", ""]
|
93
93
|
when /\A\r\nContent-Type: text\/html\r
|
94
94
|
Content-ID: <frame-[A-Z0-9]{32}@mhtml\.blink>\r
|
95
95
|
Content-Transfer-Encoding: quoted-printable\r
|
96
96
|
Content-Location: \S+\r\n\r\n/
|
97
|
-
puts "html #{$'.size}"
|
97
|
+
STDERR.puts "html #{$'.size}"
|
98
98
|
header = $&
|
99
99
|
t = $'.gsub(/=([0-9A-F][0-9A-F])/){ fail $1 unless "3D" == $1 || "20" == $1 || "0A" == $1 unless "80" <= $1; $1.hex.chr }.gsub("=\r\n", "")
|
100
|
-
puts "unpacked #{t.size}"
|
100
|
+
STDERR.puts "unpacked #{t.size}"
|
101
101
|
html = ::Nokogiri::HTML t#.force_encoding "utf-8"
|
102
102
|
|
103
|
-
puts ".to_s.size #{html.to_s.size}"
|
103
|
+
STDERR.puts ".to_s.size #{html.to_s.size}"
|
104
104
|
|
105
105
|
html.xpath("//*[not(*)]").group_by(&:name).
|
106
106
|
map{ |_, g| [_, g.map(&:to_s).map(&:size).reduce(:+)] }.
|
@@ -108,46 +108,48 @@ Content-Location: \S+\r\n\r\n/
|
|
108
108
|
|
109
109
|
if block_given?
|
110
110
|
yield html
|
111
|
-
puts "yielded"
|
112
|
-
puts "yield #{html.to_s.size}"
|
111
|
+
STDERR.puts "yielded"
|
112
|
+
STDERR.puts "yield #{html.to_s.size}"
|
113
113
|
end
|
114
114
|
|
115
115
|
reps.push [prev, scanner.pos-delimeter.size-4, header, html.to_s, true, :html]
|
116
116
|
when /\A\r\nContent-Type: text\/css\r
|
117
117
|
Content-Transfer-Encoding: quoted-printable\r
|
118
118
|
Content-Location: \S+\r\n\r\n/
|
119
|
-
puts "css > #{$'.size}"
|
119
|
+
STDERR.puts "css > #{$'.size}"
|
120
120
|
header = $&
|
121
121
|
css = $'.gsub(/=([0-9A-F][0-9A-F])/){ fail $1 unless "3D" == $1 || "20" == $1 || "0A" == $1 unless "80" <= $1; $1.hex.chr }.gsub("=\r\n", "")
|
122
122
|
css.gsub!(/[\r\n]+/, "\n")
|
123
123
|
|
124
|
-
puts "css < #{css.size}"
|
124
|
+
STDERR.puts "css < #{css.size}"
|
125
125
|
reps.push [prev, scanner.pos-delimeter.size-4, header, css, true, :css]
|
126
126
|
|
127
127
|
when /\A\r\nContent-Type: image\/(webp|png|gif|jpeg)\r
|
128
128
|
Content-Transfer-Encoding: base64\r
|
129
129
|
Content-Location: \S+\r\n\r\n/
|
130
|
-
puts "#{$1} #{$'.size}"
|
130
|
+
STDERR.puts "#{$1} #{$'.size}"
|
131
131
|
when /\A\r\nContent-Type: image\/svg\+xml\r
|
132
132
|
Content-Transfer-Encoding: quoted-printable\r
|
133
133
|
Content-Location: \S+\r\n\r\n/
|
134
|
-
puts "svg #{$'.size}"
|
134
|
+
STDERR.puts "svg #{$'.size}"
|
135
135
|
else
|
136
|
-
puts doc[0..300]
|
136
|
+
STDERR.puts doc[0..300]
|
137
137
|
fail
|
138
138
|
end
|
139
139
|
fail unless scanner.charpos == prev = scanner.pos
|
140
140
|
end
|
141
141
|
|
142
|
-
|
143
|
-
|
142
|
+
is = reps.map.with_index{ |(_, _, _, _, _, type), i| i if :html == type }.compact
|
143
|
+
STDERR.puts is.inspect
|
144
|
+
cs = reps.map.with_index{ |(_, _, _, _, _, type), i| i if :css == type }.compact
|
145
|
+
STDERR.puts cs.inspect
|
144
146
|
cs.each_cons(2){ |i,j| fail unless i+1==j }
|
145
147
|
fail unless is == [cs[0]-1]
|
146
148
|
File.write "temp.htm", reps[is[0]][3]
|
147
149
|
puts "css > #{File.size "temp.css"}"
|
148
150
|
File.open("temp.css", "w"){ |f| cs.each{ |i| f.puts reps[i][3] } }
|
149
151
|
system "uncss temp.htm -s temp.css -o out.css"
|
150
|
-
puts "css < #{File.size "out.css"}"
|
152
|
+
STDERR.puts "css < #{File.size "out.css"}"
|
151
153
|
reps[cs[0]][1] = reps[cs[-1]][1]
|
152
154
|
reps[cs[0]+1..cs[-1]] = []
|
153
155
|
reps[cs[0]][3] = File.read "out.css"
|
@@ -158,11 +160,11 @@ Content-Location: \S+\r\n\r\n/
|
|
158
160
|
b.gsub(/[\x80-\xFF]/n){ |_| "=%02X" % _.ord }.
|
159
161
|
gsub(/.{73}[^=][^=](?=.)/, "\\0=\r\n") :
|
160
162
|
header + str.gsub("\n", "\r\n")
|
161
|
-
|
163
|
+
STDERR.puts [str.size, "to - from = #{to - from}"].inspect
|
162
164
|
mht[from...to] = str
|
163
165
|
end
|
164
|
-
|
165
|
-
puts "OK"
|
166
|
+
puts mht
|
167
|
+
STDERR.puts "OK"
|
166
168
|
end
|
167
169
|
|
168
170
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ferrum_common
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Victor Maslov aka Nakilon
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-09-
|
11
|
+
date: 2023-09-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ferrum
|