ferrum_common 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 337f36c693834482dea4e00fcb2d5407b2787df08279bdba679b5e22b3482745
4
- data.tar.gz: 01bceb7e7fa64fa3082040ecc11b7c83674ace1a2282153132172cc3e6c12a9f
3
+ metadata.gz: e128a92c5b106f583d12cf8cc772b95637272dae11711e1095916e8485ccd9c5
4
+ data.tar.gz: c52cb59ea98be841986e6930e6a21456636491684cbe2d812f474dff996bb105
5
5
  SHA512:
6
- metadata.gz: 56f406f33b726a82f4aa2f7e8b1c4e39b53ddc14dedea932f550bb1e4d7586d09d835915fd2a219327c7fdfeae34e245ae4721b9f06eb66a2cbec1b743b1f977
7
- data.tar.gz: afbcaa65ed2a25940d44290e3e82f4269eb5b74c6cdc80f6d402da7e7d87ff14fd3f704243d7e080f51c9014e74abba580ecc11df39a4c5a6838e9cf98bf5dd2
6
+ metadata.gz: aee22d614c6b08f217fcf0786083ef0b7a4aa0c154f79948916fe3433796c1255168df736ae6fdac82e3e0fd4d91ce309aaddadd0fc80e93087856e4cc355789
7
+ data.tar.gz: 6a2939158a8d3dc9da7938d63cad598fb005c88ef0307418ea9dd8a6f5eb3a8af34a1ad5c59257e23d7a99150a7c627520f9d26e1abe9c69ca4c3a0292f52546
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "ferrum_common"
3
- spec.version = "0.1.0"
3
+ spec.version = "0.2.0"
4
4
  spec.summary = "[WIP] common useful extensions for ferrum or cuprite"
5
5
 
6
6
  spec.author = "Victor Maslov aka Nakilon"
@@ -10,6 +10,7 @@ Gem::Specification.new do |spec|
10
10
 
11
11
  spec.add_dependency "ferrum"
12
12
  spec.add_dependency "browser_reposition"
13
+ spec.add_dependency "nokogiri"
13
14
  spec.required_ruby_version = ">=2.5" # why?
14
15
 
15
16
  spec.files = %w{ LICENSE ferrum_common.gemspec lib/ferrum_common.rb }
data/lib/ferrum_common.rb CHANGED
@@ -3,17 +3,18 @@ module FerrumCommon
3
3
 
4
4
  module Common
5
5
 
6
- def self.mhtml browser, timeout, mtd, msg = nil
6
+ private def mhtml browser, timeout, mtd, msg = nil
7
7
  Timeout.timeout(timeout){ yield }
8
8
  rescue Timeout::Error
9
9
  browser.mhtml path: "temp.mhtml"
10
+ STDERR.puts "dumped to ./temp.mhtml"
10
11
  $!.backtrace.reject!{ |_| _[/\/gems\/concurrent-ruby-/] }
11
12
  $!.backtrace.reject!{ |_| _[/\/gems\/ferrum-/] }
12
13
  raise Timeout::Error, "#{$!.to_s} after #{timeout} sec in #{mtd}#{" (#{msg.respond_to?(:call) ? msg.call : msg})" if msg}"
13
14
  end
14
15
 
15
16
  def until_true timeout, msg = nil
16
- Module.nesting.first.mhtml self, timeout, __method__, msg do
17
+ mhtml self, timeout, __method__, msg do
17
18
  begin
18
19
  yield
19
20
  rescue Ferrum::NodeNotFoundError
@@ -24,12 +25,9 @@ module FerrumCommon
24
25
 
25
26
  def until_one type, selector, timeout
26
27
  t = nil
27
- Module.nesting.first.mhtml self, timeout, __method__, ->{ "expected exactly one node for #{type} #{selector.inspect}, got #{t ? t.size : "none"}" } do
28
+ mhtml self, timeout, __method__, ->{ "expected exactly one node for #{type} #{selector.inspect}, got #{t ? t.size : "none"}" } do
28
29
  t = begin
29
30
  public_method(type).call selector
30
- rescue Ferrum::NodeNotFoundError
31
- sleep timeout * 0.1
32
- redo
33
31
  end
34
32
  unless 1 == t.size
35
33
  sleep timeout * 0.1
@@ -43,7 +41,7 @@ module FerrumCommon
43
41
  # puts (msg_or_cause.respond_to?(:backtrace) ? msg_or_cause : Thread.current).backtrace
44
42
  puts (msg_or_cause.respond_to?(:full_message) ? msg_or_cause.full_message : Thread.current.backtrace)
45
43
  mhtml path: "temp.mhtml"
46
- puts "dumped to ./temp.mhtml"
44
+ STDERR.puts "dumped to ./temp.mhtml"
47
45
  Kernel.abort msg_or_cause.to_s
48
46
  end
49
47
 
@@ -51,10 +49,120 @@ module FerrumCommon
51
49
  Ferrum::Page.include Common
52
50
  Ferrum::Frame.include Common
53
51
 
54
- require "browser_reposition"
55
- Ferrum::Browser.include Common, BrowserReposition
56
- def self.new **_
57
- Ferrum::Browser.new(**_).tap(&:reposition)
52
+ if "darwin" == Gem::Platform.local.os
53
+ require "browser_reposition"
54
+ Ferrum::Browser.include Common, BrowserReposition
55
+ def self.new **_
56
+ Ferrum::Browser.new(**_).tap(&:reposition)
57
+ end
58
+ else
59
+ Ferrum::Browser.include Common
60
+ def self.new **_
61
+ Ferrum::Browser.new **_
62
+ end
63
+ end
64
+
65
+ # https://datatracker.ietf.org/doc/html/rfc2557
66
+ # https://en.wikipedia.org/wiki/Quoted-printable
67
+ # require "strscan"
68
+ require "nokogiri" # Oga crashes on vk charset
69
+ def self.process_mhtml mht
70
+ scanner = ::StringScanner.new mht
71
+ fail scanner.peek(100).inspect unless scanner.scan(/\AFrom: <Saved by Blink>\r
72
+ Snapshot-Content-Location: \S+\r
73
+ Subject:(?: \S+\r\n)+Date: [A-Z][a-z][a-z], \d\d? [A-Z][a-z][a-z] 20\d\d \d\d:\d\d:\d\d -0000\r
74
+ MIME-Version: 1\.0\r
75
+ Content-Type: multipart\/related;\r
76
+ \ttype="text\/html";\r
77
+ \tboundary="(----MultipartBoundary--[a-zA-Z0-9]{42}----)"\r\n\r\n\r\n--\1/)
78
+ delimeter = scanner[1]
79
+ fail unless scanner.charpos == prev = scanner.pos
80
+ reps = []
81
+ while s = scanner.search_full(::Regexp.new(delimeter), true, true)
82
+ doc = s[0...-delimeter.size-4]
83
+ case doc
84
+ when /\A\r\nContent-Type: text\/html\r
85
+ Content-ID: <frame-[A-Z0-9]{32}@mhtml\.blink>\r
86
+ Content-Transfer-Encoding: quoted-printable\r
87
+ Content-Location: chrome-error:\/\/chromewebdata\/\r\n\r\n/,
88
+ /\A\r\nContent-Type: text\/html\r
89
+ Content-ID: <frame-[A-Z0-9]{32}@mhtml\.blink>\r
90
+ Content-Transfer-Encoding: quoted-printable\r\n\r\n/
91
+ puts "trash #{$'.size}"
92
+ reps.push [prev-delimeter.size-2, scanner.pos-delimeter.size-4, "", ""]
93
+ when /\A\r\nContent-Type: text\/html\r
94
+ Content-ID: <frame-[A-Z0-9]{32}@mhtml\.blink>\r
95
+ Content-Transfer-Encoding: quoted-printable\r
96
+ Content-Location: \S+\r\n\r\n/
97
+ puts "html #{$'.size}"
98
+ header = $&
99
+ t = $'.gsub(/=([0-9A-F][0-9A-F])/){ fail $1 unless "3D" == $1 || "20" == $1 || "0A" == $1 unless "80" <= $1; $1.hex.chr }.gsub("=\r\n", "")
100
+ puts "unpacked #{t.size}"
101
+ html = ::Nokogiri::HTML t#.force_encoding "utf-8"
102
+
103
+ puts ".to_s.size #{html.to_s.size}"
104
+
105
+ html.xpath("//*[not(*)]").group_by(&:name).
106
+ map{ |_, g| [_, g.map(&:to_s).map(&:size).reduce(:+)] }.
107
+ sort_by(&:last).reverse.take(5).each &method(:p)
108
+
109
+ if block_given?
110
+ yield html
111
+ puts "yielded"
112
+ puts "yield #{html.to_s.size}"
113
+ end
114
+
115
+ reps.push [prev, scanner.pos-delimeter.size-4, header, html.to_s, true, :html]
116
+ when /\A\r\nContent-Type: text\/css\r
117
+ Content-Transfer-Encoding: quoted-printable\r
118
+ Content-Location: \S+\r\n\r\n/
119
+ puts "css > #{$'.size}"
120
+ header = $&
121
+ css = $'.gsub(/=([0-9A-F][0-9A-F])/){ fail $1 unless "3D" == $1 || "20" == $1 || "0A" == $1 unless "80" <= $1; $1.hex.chr }.gsub("=\r\n", "")
122
+ css.gsub!(/[\r\n]+/, "\n")
123
+
124
+ puts "css < #{css.size}"
125
+ reps.push [prev, scanner.pos-delimeter.size-4, header, css, true, :css]
126
+
127
+ when /\A\r\nContent-Type: image\/(webp|png|gif|jpeg)\r
128
+ Content-Transfer-Encoding: base64\r
129
+ Content-Location: \S+\r\n\r\n/
130
+ puts "#{$1} #{$'.size}"
131
+ when /\A\r\nContent-Type: image\/svg\+xml\r
132
+ Content-Transfer-Encoding: quoted-printable\r
133
+ Content-Location: \S+\r\n\r\n/
134
+ puts "svg #{$'.size}"
135
+ else
136
+ puts doc[0..300]
137
+ fail
138
+ end
139
+ fail unless scanner.charpos == prev = scanner.pos
140
+ end
141
+
142
+ p is = reps.map.with_index{ |(_, _, _, _, _, type), i| i if :html == type }.compact
143
+ p cs = reps.map.with_index{ |(_, _, _, _, _, type), i| i if :css == type }.compact
144
+ cs.each_cons(2){ |i,j| fail unless i+1==j }
145
+ fail unless is == [cs[0]-1]
146
+ File.write "temp.htm", reps[is[0]][3]
147
+ puts "css > #{File.size "temp.css"}"
148
+ File.open("temp.css", "w"){ |f| cs.each{ |i| f.puts reps[i][3] } }
149
+ system "uncss temp.htm -s temp.css -o out.css"
150
+ puts "css < #{File.size "out.css"}"
151
+ reps[cs[0]][1] = reps[cs[-1]][1]
152
+ reps[cs[0]+1..cs[-1]] = []
153
+ reps[cs[0]][3] = File.read "out.css"
154
+
155
+ reps.reverse_each do |from, to, header, str, qp|
156
+ str = qp ?
157
+ header + str.gsub("=", "=3D").
158
+ b.gsub(/[\x80-\xFF]/n){ |_| "=%02X" % _.ord }.
159
+ gsub(/.{73}[^=][^=](?=.)/, "\\0=\r\n") :
160
+ header + str.gsub("\n", "\r\n")
161
+ p [str.size, "to - from = #{to - from}"]
162
+ mht[from...to] = str
163
+ end
164
+ p ::File.write "temp.mht", mht
165
+ puts "OK"
58
166
  end
59
167
 
60
168
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ferrum_common
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Victor Maslov aka Nakilon
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-02-09 00:00:00.000000000 Z
11
+ date: 2023-09-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ferrum
@@ -38,6 +38,20 @@ dependencies:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: nokogiri
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
41
55
  description:
42
56
  email: nakilon@gmail.com
43
57
  executables: []