ferrum_common 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ferrum_common.gemspec +1 -1
- data/lib/ferrum_common.rb +26 -24
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3f66888a248e7235fdccb3510c7370d708c2052868d828c4553c41b6b0d3411e
|
4
|
+
data.tar.gz: e57b7c549817f6294703ee108d2b54d400d34735fc8cde5a8f63f27546aa9562
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b87cac7f5c32144f8955c436f37d1b98b4562a771fd12607bbfee6524c983288b0eba127474fa12ded126bb5abf0673740eda70dc2e0144a7fb8c2c0f24b04f4
|
7
|
+
data.tar.gz: 7731364485b2bf8be92c6d4e4aaa4cbd9465d1cc9386cd0cbfa9d5e24f26f33065f3c68ebbcb55cea6c30fce320b50a719a62b3cbddfce69282952f39ebad6e7
|
data/ferrum_common.gemspec
CHANGED
data/lib/ferrum_common.rb
CHANGED
@@ -3,7 +3,7 @@ module FerrumCommon
|
|
3
3
|
|
4
4
|
module Common
|
5
5
|
|
6
|
-
private def
|
6
|
+
private def yield_with_timeout browser, timeout, mtd, msg = nil
|
7
7
|
Timeout.timeout(timeout){ yield }
|
8
8
|
rescue Timeout::Error
|
9
9
|
browser.mhtml path: "temp.mhtml"
|
@@ -14,7 +14,7 @@ module FerrumCommon
|
|
14
14
|
end
|
15
15
|
|
16
16
|
def until_true timeout, msg = nil
|
17
|
-
|
17
|
+
yield_with_timeout self, timeout, __method__, msg do
|
18
18
|
begin
|
19
19
|
yield
|
20
20
|
rescue Ferrum::NodeNotFoundError
|
@@ -25,7 +25,7 @@ module FerrumCommon
|
|
25
25
|
|
26
26
|
def until_one type, selector, timeout
|
27
27
|
t = nil
|
28
|
-
|
28
|
+
yield_with_timeout self, timeout, __method__, ->{ "expected exactly one node for #{type} #{selector.inspect}, got #{t ? t.size : "none"}" } do
|
29
29
|
t = begin
|
30
30
|
public_method(type).call selector
|
31
31
|
end
|
@@ -66,11 +66,11 @@ module FerrumCommon
|
|
66
66
|
# https://en.wikipedia.org/wiki/Quoted-printable
|
67
67
|
# require "strscan"
|
68
68
|
require "nokogiri" # Oga crashes on vk charset
|
69
|
-
def self.process_mhtml
|
70
|
-
scanner = ::StringScanner.new
|
71
|
-
fail scanner.peek(
|
69
|
+
def self.process_mhtml
|
70
|
+
scanner = ::StringScanner.new(mht = ARGF.read)
|
71
|
+
fail scanner.peek(400).inspect unless scanner.scan(/\AFrom: <Saved by Blink>\r
|
72
72
|
Snapshot-Content-Location: \S+\r
|
73
|
-
Subject:(?: \S
|
73
|
+
Subject:(?: \S.*\r\n)+Date: [A-Z][a-z][a-z], \d\d? [A-Z][a-z][a-z] 20\d\d \d\d:\d\d:\d\d -0000\r
|
74
74
|
MIME-Version: 1\.0\r
|
75
75
|
Content-Type: multipart\/related;\r
|
76
76
|
\ttype="text\/html";\r
|
@@ -88,19 +88,19 @@ Content-Location: chrome-error:\/\/chromewebdata\/\r\n\r\n/,
|
|
88
88
|
/\A\r\nContent-Type: text\/html\r
|
89
89
|
Content-ID: <frame-[A-Z0-9]{32}@mhtml\.blink>\r
|
90
90
|
Content-Transfer-Encoding: quoted-printable\r\n\r\n/
|
91
|
-
puts "trash #{$'.size}"
|
91
|
+
STDERR.puts "trash #{$'.size}"
|
92
92
|
reps.push [prev-delimeter.size-2, scanner.pos-delimeter.size-4, "", ""]
|
93
93
|
when /\A\r\nContent-Type: text\/html\r
|
94
94
|
Content-ID: <frame-[A-Z0-9]{32}@mhtml\.blink>\r
|
95
95
|
Content-Transfer-Encoding: quoted-printable\r
|
96
96
|
Content-Location: \S+\r\n\r\n/
|
97
|
-
puts "html #{$'.size}"
|
97
|
+
STDERR.puts "html #{$'.size}"
|
98
98
|
header = $&
|
99
99
|
t = $'.gsub(/=([0-9A-F][0-9A-F])/){ fail $1 unless "3D" == $1 || "20" == $1 || "0A" == $1 unless "80" <= $1; $1.hex.chr }.gsub("=\r\n", "")
|
100
|
-
puts "unpacked #{t.size}"
|
100
|
+
STDERR.puts "unpacked #{t.size}"
|
101
101
|
html = ::Nokogiri::HTML t#.force_encoding "utf-8"
|
102
102
|
|
103
|
-
puts ".to_s.size #{html.to_s.size}"
|
103
|
+
STDERR.puts ".to_s.size #{html.to_s.size}"
|
104
104
|
|
105
105
|
html.xpath("//*[not(*)]").group_by(&:name).
|
106
106
|
map{ |_, g| [_, g.map(&:to_s).map(&:size).reduce(:+)] }.
|
@@ -108,46 +108,48 @@ Content-Location: \S+\r\n\r\n/
|
|
108
108
|
|
109
109
|
if block_given?
|
110
110
|
yield html
|
111
|
-
puts "yielded"
|
112
|
-
puts "yield #{html.to_s.size}"
|
111
|
+
STDERR.puts "yielded"
|
112
|
+
STDERR.puts "yield #{html.to_s.size}"
|
113
113
|
end
|
114
114
|
|
115
115
|
reps.push [prev, scanner.pos-delimeter.size-4, header, html.to_s, true, :html]
|
116
116
|
when /\A\r\nContent-Type: text\/css\r
|
117
117
|
Content-Transfer-Encoding: quoted-printable\r
|
118
118
|
Content-Location: \S+\r\n\r\n/
|
119
|
-
puts "css > #{$'.size}"
|
119
|
+
STDERR.puts "css > #{$'.size}"
|
120
120
|
header = $&
|
121
121
|
css = $'.gsub(/=([0-9A-F][0-9A-F])/){ fail $1 unless "3D" == $1 || "20" == $1 || "0A" == $1 unless "80" <= $1; $1.hex.chr }.gsub("=\r\n", "")
|
122
122
|
css.gsub!(/[\r\n]+/, "\n")
|
123
123
|
|
124
|
-
puts "css < #{css.size}"
|
124
|
+
STDERR.puts "css < #{css.size}"
|
125
125
|
reps.push [prev, scanner.pos-delimeter.size-4, header, css, true, :css]
|
126
126
|
|
127
127
|
when /\A\r\nContent-Type: image\/(webp|png|gif|jpeg)\r
|
128
128
|
Content-Transfer-Encoding: base64\r
|
129
129
|
Content-Location: \S+\r\n\r\n/
|
130
|
-
puts "#{$1} #{$'.size}"
|
130
|
+
STDERR.puts "#{$1} #{$'.size}"
|
131
131
|
when /\A\r\nContent-Type: image\/svg\+xml\r
|
132
132
|
Content-Transfer-Encoding: quoted-printable\r
|
133
133
|
Content-Location: \S+\r\n\r\n/
|
134
|
-
puts "svg #{$'.size}"
|
134
|
+
STDERR.puts "svg #{$'.size}"
|
135
135
|
else
|
136
|
-
puts doc[0..300]
|
136
|
+
STDERR.puts doc[0..300]
|
137
137
|
fail
|
138
138
|
end
|
139
139
|
fail unless scanner.charpos == prev = scanner.pos
|
140
140
|
end
|
141
141
|
|
142
|
-
|
143
|
-
|
142
|
+
is = reps.map.with_index{ |(_, _, _, _, _, type), i| i if :html == type }.compact
|
143
|
+
STDERR.puts is.inspect
|
144
|
+
cs = reps.map.with_index{ |(_, _, _, _, _, type), i| i if :css == type }.compact
|
145
|
+
STDERR.puts cs.inspect
|
144
146
|
cs.each_cons(2){ |i,j| fail unless i+1==j }
|
145
147
|
fail unless is == [cs[0]-1]
|
146
148
|
File.write "temp.htm", reps[is[0]][3]
|
147
149
|
puts "css > #{File.size "temp.css"}"
|
148
150
|
File.open("temp.css", "w"){ |f| cs.each{ |i| f.puts reps[i][3] } }
|
149
151
|
system "uncss temp.htm -s temp.css -o out.css"
|
150
|
-
puts "css < #{File.size "out.css"}"
|
152
|
+
STDERR.puts "css < #{File.size "out.css"}"
|
151
153
|
reps[cs[0]][1] = reps[cs[-1]][1]
|
152
154
|
reps[cs[0]+1..cs[-1]] = []
|
153
155
|
reps[cs[0]][3] = File.read "out.css"
|
@@ -158,11 +160,11 @@ Content-Location: \S+\r\n\r\n/
|
|
158
160
|
b.gsub(/[\x80-\xFF]/n){ |_| "=%02X" % _.ord }.
|
159
161
|
gsub(/.{73}[^=][^=](?=.)/, "\\0=\r\n") :
|
160
162
|
header + str.gsub("\n", "\r\n")
|
161
|
-
|
163
|
+
STDERR.puts [str.size, "to - from = #{to - from}"].inspect
|
162
164
|
mht[from...to] = str
|
163
165
|
end
|
164
|
-
|
165
|
-
puts "OK"
|
166
|
+
puts mht
|
167
|
+
STDERR.puts "OK"
|
166
168
|
end
|
167
169
|
|
168
170
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ferrum_common
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Victor Maslov aka Nakilon
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-09-
|
11
|
+
date: 2023-09-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ferrum
|