ferrum_common 0.1.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 337f36c693834482dea4e00fcb2d5407b2787df08279bdba679b5e22b3482745
4
- data.tar.gz: 01bceb7e7fa64fa3082040ecc11b7c83674ace1a2282153132172cc3e6c12a9f
3
+ metadata.gz: 3f66888a248e7235fdccb3510c7370d708c2052868d828c4553c41b6b0d3411e
4
+ data.tar.gz: e57b7c549817f6294703ee108d2b54d400d34735fc8cde5a8f63f27546aa9562
5
5
  SHA512:
6
- metadata.gz: 56f406f33b726a82f4aa2f7e8b1c4e39b53ddc14dedea932f550bb1e4d7586d09d835915fd2a219327c7fdfeae34e245ae4721b9f06eb66a2cbec1b743b1f977
7
- data.tar.gz: afbcaa65ed2a25940d44290e3e82f4269eb5b74c6cdc80f6d402da7e7d87ff14fd3f704243d7e080f51c9014e74abba580ecc11df39a4c5a6838e9cf98bf5dd2
6
+ metadata.gz: b87cac7f5c32144f8955c436f37d1b98b4562a771fd12607bbfee6524c983288b0eba127474fa12ded126bb5abf0673740eda70dc2e0144a7fb8c2c0f24b04f4
7
+ data.tar.gz: 7731364485b2bf8be92c6d4e4aaa4cbd9465d1cc9386cd0cbfa9d5e24f26f33065f3c68ebbcb55cea6c30fce320b50a719a62b3cbddfce69282952f39ebad6e7
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "ferrum_common"
3
- spec.version = "0.1.0"
3
+ spec.version = "0.3.0"
4
4
  spec.summary = "[WIP] common useful extensions for ferrum or cuprite"
5
5
 
6
6
  spec.author = "Victor Maslov aka Nakilon"
@@ -10,6 +10,7 @@ Gem::Specification.new do |spec|
10
10
 
11
11
  spec.add_dependency "ferrum"
12
12
  spec.add_dependency "browser_reposition"
13
+ spec.add_dependency "nokogiri"
13
14
  spec.required_ruby_version = ">=2.5" # why?
14
15
 
15
16
  spec.files = %w{ LICENSE ferrum_common.gemspec lib/ferrum_common.rb }
data/lib/ferrum_common.rb CHANGED
@@ -3,17 +3,18 @@ module FerrumCommon
3
3
 
4
4
  module Common
5
5
 
6
- def self.mhtml browser, timeout, mtd, msg = nil
6
+ private def yield_with_timeout browser, timeout, mtd, msg = nil
7
7
  Timeout.timeout(timeout){ yield }
8
8
  rescue Timeout::Error
9
9
  browser.mhtml path: "temp.mhtml"
10
+ STDERR.puts "dumped to ./temp.mhtml"
10
11
  $!.backtrace.reject!{ |_| _[/\/gems\/concurrent-ruby-/] }
11
12
  $!.backtrace.reject!{ |_| _[/\/gems\/ferrum-/] }
12
13
  raise Timeout::Error, "#{$!.to_s} after #{timeout} sec in #{mtd}#{" (#{msg.respond_to?(:call) ? msg.call : msg})" if msg}"
13
14
  end
14
15
 
15
16
  def until_true timeout, msg = nil
16
- Module.nesting.first.mhtml self, timeout, __method__, msg do
17
+ yield_with_timeout self, timeout, __method__, msg do
17
18
  begin
18
19
  yield
19
20
  rescue Ferrum::NodeNotFoundError
@@ -24,12 +25,9 @@ module FerrumCommon
24
25
 
25
26
  def until_one type, selector, timeout
26
27
  t = nil
27
- Module.nesting.first.mhtml self, timeout, __method__, ->{ "expected exactly one node for #{type} #{selector.inspect}, got #{t ? t.size : "none"}" } do
28
+ yield_with_timeout self, timeout, __method__, ->{ "expected exactly one node for #{type} #{selector.inspect}, got #{t ? t.size : "none"}" } do
28
29
  t = begin
29
30
  public_method(type).call selector
30
- rescue Ferrum::NodeNotFoundError
31
- sleep timeout * 0.1
32
- redo
33
31
  end
34
32
  unless 1 == t.size
35
33
  sleep timeout * 0.1
@@ -43,7 +41,7 @@ module FerrumCommon
43
41
  # puts (msg_or_cause.respond_to?(:backtrace) ? msg_or_cause : Thread.current).backtrace
44
42
  puts (msg_or_cause.respond_to?(:full_message) ? msg_or_cause.full_message : Thread.current.backtrace)
45
43
  mhtml path: "temp.mhtml"
46
- puts "dumped to ./temp.mhtml"
44
+ STDERR.puts "dumped to ./temp.mhtml"
47
45
  Kernel.abort msg_or_cause.to_s
48
46
  end
49
47
 
@@ -51,10 +49,122 @@ module FerrumCommon
51
49
  Ferrum::Page.include Common
52
50
  Ferrum::Frame.include Common
53
51
 
54
- require "browser_reposition"
55
- Ferrum::Browser.include Common, BrowserReposition
56
- def self.new **_
57
- Ferrum::Browser.new(**_).tap(&:reposition)
52
+ if "darwin" == Gem::Platform.local.os
53
+ require "browser_reposition"
54
+ Ferrum::Browser.include Common, BrowserReposition
55
+ def self.new **_
56
+ Ferrum::Browser.new(**_).tap(&:reposition)
57
+ end
58
+ else
59
+ Ferrum::Browser.include Common
60
+ def self.new **_
61
+ Ferrum::Browser.new **_
62
+ end
63
+ end
64
+
65
+ # https://datatracker.ietf.org/doc/html/rfc2557
66
+ # https://en.wikipedia.org/wiki/Quoted-printable
67
+ # require "strscan"
68
+ require "nokogiri" # Oga crashes on vk charset
69
+ def self.process_mhtml
70
+ scanner = ::StringScanner.new(mht = ARGF.read)
71
+ fail scanner.peek(400).inspect unless scanner.scan(/\AFrom: <Saved by Blink>\r
72
+ Snapshot-Content-Location: \S+\r
73
+ Subject:(?: \S.*\r\n)+Date: [A-Z][a-z][a-z], \d\d? [A-Z][a-z][a-z] 20\d\d \d\d:\d\d:\d\d -0000\r
74
+ MIME-Version: 1\.0\r
75
+ Content-Type: multipart\/related;\r
76
+ \ttype="text\/html";\r
77
+ \tboundary="(----MultipartBoundary--[a-zA-Z0-9]{42}----)"\r\n\r\n\r\n--\1/)
78
+ delimeter = scanner[1]
79
+ fail unless scanner.charpos == prev = scanner.pos
80
+ reps = []
81
+ while s = scanner.search_full(::Regexp.new(delimeter), true, true)
82
+ doc = s[0...-delimeter.size-4]
83
+ case doc
84
+ when /\A\r\nContent-Type: text\/html\r
85
+ Content-ID: <frame-[A-Z0-9]{32}@mhtml\.blink>\r
86
+ Content-Transfer-Encoding: quoted-printable\r
87
+ Content-Location: chrome-error:\/\/chromewebdata\/\r\n\r\n/,
88
+ /\A\r\nContent-Type: text\/html\r
89
+ Content-ID: <frame-[A-Z0-9]{32}@mhtml\.blink>\r
90
+ Content-Transfer-Encoding: quoted-printable\r\n\r\n/
91
+ STDERR.puts "trash #{$'.size}"
92
+ reps.push [prev-delimeter.size-2, scanner.pos-delimeter.size-4, "", ""]
93
+ when /\A\r\nContent-Type: text\/html\r
94
+ Content-ID: <frame-[A-Z0-9]{32}@mhtml\.blink>\r
95
+ Content-Transfer-Encoding: quoted-printable\r
96
+ Content-Location: \S+\r\n\r\n/
97
+ STDERR.puts "html #{$'.size}"
98
+ header = $&
99
+ t = $'.gsub(/=([0-9A-F][0-9A-F])/){ fail $1 unless "3D" == $1 || "20" == $1 || "0A" == $1 unless "80" <= $1; $1.hex.chr }.gsub("=\r\n", "")
100
+ STDERR.puts "unpacked #{t.size}"
101
+ html = ::Nokogiri::HTML t#.force_encoding "utf-8"
102
+
103
+ STDERR.puts ".to_s.size #{html.to_s.size}"
104
+
105
+ html.xpath("//*[not(*)]").group_by(&:name).
106
+ map{ |_, g| [_, g.map(&:to_s).map(&:size).reduce(:+)] }.
107
+ sort_by(&:last).reverse.take(5).each &method(:p)
108
+
109
+ if block_given?
110
+ yield html
111
+ STDERR.puts "yielded"
112
+ STDERR.puts "yield #{html.to_s.size}"
113
+ end
114
+
115
+ reps.push [prev, scanner.pos-delimeter.size-4, header, html.to_s, true, :html]
116
+ when /\A\r\nContent-Type: text\/css\r
117
+ Content-Transfer-Encoding: quoted-printable\r
118
+ Content-Location: \S+\r\n\r\n/
119
+ STDERR.puts "css > #{$'.size}"
120
+ header = $&
121
+ css = $'.gsub(/=([0-9A-F][0-9A-F])/){ fail $1 unless "3D" == $1 || "20" == $1 || "0A" == $1 unless "80" <= $1; $1.hex.chr }.gsub("=\r\n", "")
122
+ css.gsub!(/[\r\n]+/, "\n")
123
+
124
+ STDERR.puts "css < #{css.size}"
125
+ reps.push [prev, scanner.pos-delimeter.size-4, header, css, true, :css]
126
+
127
+ when /\A\r\nContent-Type: image\/(webp|png|gif|jpeg)\r
128
+ Content-Transfer-Encoding: base64\r
129
+ Content-Location: \S+\r\n\r\n/
130
+ STDERR.puts "#{$1} #{$'.size}"
131
+ when /\A\r\nContent-Type: image\/svg\+xml\r
132
+ Content-Transfer-Encoding: quoted-printable\r
133
+ Content-Location: \S+\r\n\r\n/
134
+ STDERR.puts "svg #{$'.size}"
135
+ else
136
+ STDERR.puts doc[0..300]
137
+ fail
138
+ end
139
+ fail unless scanner.charpos == prev = scanner.pos
140
+ end
141
+
142
+ is = reps.map.with_index{ |(_, _, _, _, _, type), i| i if :html == type }.compact
143
+ STDERR.puts is.inspect
144
+ cs = reps.map.with_index{ |(_, _, _, _, _, type), i| i if :css == type }.compact
145
+ STDERR.puts cs.inspect
146
+ cs.each_cons(2){ |i,j| fail unless i+1==j }
147
+ fail unless is == [cs[0]-1]
148
+ File.write "temp.htm", reps[is[0]][3]
149
+ puts "css > #{File.size "temp.css"}"
150
+ File.open("temp.css", "w"){ |f| cs.each{ |i| f.puts reps[i][3] } }
151
+ system "uncss temp.htm -s temp.css -o out.css"
152
+ STDERR.puts "css < #{File.size "out.css"}"
153
+ reps[cs[0]][1] = reps[cs[-1]][1]
154
+ reps[cs[0]+1..cs[-1]] = []
155
+ reps[cs[0]][3] = File.read "out.css"
156
+
157
+ reps.reverse_each do |from, to, header, str, qp|
158
+ str = qp ?
159
+ header + str.gsub("=", "=3D").
160
+ b.gsub(/[\x80-\xFF]/n){ |_| "=%02X" % _.ord }.
161
+ gsub(/.{73}[^=][^=](?=.)/, "\\0=\r\n") :
162
+ header + str.gsub("\n", "\r\n")
163
+ STDERR.puts [str.size, "to - from = #{to - from}"].inspect
164
+ mht[from...to] = str
165
+ end
166
+ puts mht
167
+ STDERR.puts "OK"
58
168
  end
59
169
 
60
170
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ferrum_common
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Victor Maslov aka Nakilon
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-02-09 00:00:00.000000000 Z
11
+ date: 2023-09-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ferrum
@@ -38,6 +38,20 @@ dependencies:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: nokogiri
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
41
55
  description:
42
56
  email: nakilon@gmail.com
43
57
  executables: []