ferrum_common 0.0.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3e8ef82a6987019ce9a01dffedf29704905fc53d7f3534cc1654b6a9a7c9220d
4
- data.tar.gz: b9d2a38f62cc43372635cfcf679f95868c735b5ad8fbf2d482ac9b640df2e06a
3
+ metadata.gz: e128a92c5b106f583d12cf8cc772b95637272dae11711e1095916e8485ccd9c5
4
+ data.tar.gz: c52cb59ea98be841986e6930e6a21456636491684cbe2d812f474dff996bb105
5
5
  SHA512:
6
- metadata.gz: 4a7044da6e7ec7c03d56d5b1b2e99f72988476cd93323af8fc9601db4fe9161b534dd8188f9ea57548676776ab05394f5d5c60d7a43fed6bc26ecbd21a93eb2e
7
- data.tar.gz: 474977a1cb5ec61a2f1ac62a47d275e34e803754c3554a8f5393c7634e0fe23abbf02fae4c7b1b9d9f48763266868fd00ec8e3df139a0ef3297dbc1a0bbeaaba
6
+ metadata.gz: aee22d614c6b08f217fcf0786083ef0b7a4aa0c154f79948916fe3433796c1255168df736ae6fdac82e3e0fd4d91ce309aaddadd0fc80e93087856e4cc355789
7
+ data.tar.gz: 6a2939158a8d3dc9da7938d63cad598fb005c88ef0307418ea9dd8a6f5eb3a8af34a1ad5c59257e23d7a99150a7c627520f9d26e1abe9c69ca4c3a0292f52546
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "ferrum_common"
3
- spec.version = "0.0.0"
3
+ spec.version = "0.2.0"
4
4
  spec.summary = "[WIP] common useful extensions for ferrum or cuprite"
5
5
 
6
6
  spec.author = "Victor Maslov aka Nakilon"
@@ -9,7 +9,9 @@ Gem::Specification.new do |spec|
9
9
  spec.metadata = {"source_code_uri" => "https://github.com/nakilon/ferrum_common"}
10
10
 
11
11
  spec.add_dependency "ferrum"
12
- spec.required_ruby_version = ">=2.5"
12
+ spec.add_dependency "browser_reposition"
13
+ spec.add_dependency "nokogiri"
14
+ spec.required_ruby_version = ">=2.5" # why?
13
15
 
14
16
  spec.files = %w{ LICENSE ferrum_common.gemspec lib/ferrum_common.rb }
15
17
  end
data/lib/ferrum_common.rb CHANGED
@@ -1,28 +1,168 @@
1
1
  require "ferrum"
2
2
  module FerrumCommon
3
- def self.new **_
4
- Ferrum::Browser.new(**_).tap do |browser|
5
- require "browser_reposition"
6
- browser.extend(BrowserReposition).reposition
7
- browser.define_singleton_method :redo_until_true do |timeout, msg = nil, &block|
8
- Timeout.timeout timeout do
9
- begin
10
- block.call
11
- rescue Ferrum::NodeNotFoundError
12
- redo
13
- end or (sleep timeout*0.1; redo)
3
+
4
+ module Common
5
+
6
+ private def mhtml browser, timeout, mtd, msg = nil
7
+ Timeout.timeout(timeout){ yield }
8
+ rescue Timeout::Error
9
+ browser.mhtml path: "temp.mhtml"
10
+ STDERR.puts "dumped to ./temp.mhtml"
11
+ $!.backtrace.reject!{ |_| _[/\/gems\/concurrent-ruby-/] }
12
+ $!.backtrace.reject!{ |_| _[/\/gems\/ferrum-/] }
13
+ raise Timeout::Error, "#{$!.to_s} after #{timeout} sec in #{mtd}#{" (#{msg.respond_to?(:call) ? msg.call : msg})" if msg}"
14
+ end
15
+
16
+ def until_true timeout, msg = nil
17
+ mhtml self, timeout, __method__, msg do
18
+ begin
19
+ yield
20
+ rescue Ferrum::NodeNotFoundError
21
+ redo
22
+ end or (sleep timeout*0.1; redo)
23
+ end
24
+ end
25
+
26
+ def until_one type, selector, timeout
27
+ t = nil
28
+ mhtml self, timeout, __method__, ->{ "expected exactly one node for #{type} #{selector.inspect}, got #{t ? t.size : "none"}" } do
29
+ t = begin
30
+ public_method(type).call selector
31
+ end
32
+ unless 1 == t.size
33
+ sleep timeout * 0.1
34
+ redo
14
35
  end
15
- rescue Timeout::Error
16
- browser.mhtml path: "temp.mhtml"
17
- $!.backtrace.reject!{ |_| _[/\/gems\/concurrent-ruby-/] }
18
- $!.backtrace.reject!{ |_| _[/\/gems\/ferrum-/] }
19
- raise Timeout::Error, "#{$!.to_s} in redo_until_true #{" (#{msg})" if msg}"
20
36
  end
21
- browser.define_singleton_method :abort do |msg|
22
- Browser.mhtml path: "temp.mhtml"
23
- puts Thread.current.backtrace
24
- abort msg
37
+ t.first
38
+ end
39
+
40
+ def abort msg_or_cause
41
+ # puts (msg_or_cause.respond_to?(:backtrace) ? msg_or_cause : Thread.current).backtrace
42
+ puts (msg_or_cause.respond_to?(:full_message) ? msg_or_cause.full_message : Thread.current.backtrace)
43
+ mhtml path: "temp.mhtml"
44
+ STDERR.puts "dumped to ./temp.mhtml"
45
+ Kernel.abort msg_or_cause.to_s
46
+ end
47
+
48
+ end
49
+ Ferrum::Page.include Common
50
+ Ferrum::Frame.include Common
51
+
52
+ if "darwin" == Gem::Platform.local.os
53
+ require "browser_reposition"
54
+ Ferrum::Browser.include Common, BrowserReposition
55
+ def self.new **_
56
+ Ferrum::Browser.new(**_).tap(&:reposition)
57
+ end
58
+ else
59
+ Ferrum::Browser.include Common
60
+ def self.new **_
61
+ Ferrum::Browser.new **_
62
+ end
63
+ end
64
+
65
+ # https://datatracker.ietf.org/doc/html/rfc2557
66
+ # https://en.wikipedia.org/wiki/Quoted-printable
67
+ # require "strscan"
68
+ require "nokogiri" # Oga crashes on vk charset
69
+ def self.process_mhtml mht
70
+ scanner = ::StringScanner.new mht
71
+ fail scanner.peek(100).inspect unless scanner.scan(/\AFrom: <Saved by Blink>\r
72
+ Snapshot-Content-Location: \S+\r
73
+ Subject:(?: \S+\r\n)+Date: [A-Z][a-z][a-z], \d\d? [A-Z][a-z][a-z] 20\d\d \d\d:\d\d:\d\d -0000\r
74
+ MIME-Version: 1\.0\r
75
+ Content-Type: multipart\/related;\r
76
+ \ttype="text\/html";\r
77
+ \tboundary="(----MultipartBoundary--[a-zA-Z0-9]{42}----)"\r\n\r\n\r\n--\1/)
78
+ delimeter = scanner[1]
79
+ fail unless scanner.charpos == prev = scanner.pos
80
+ reps = []
81
+ while s = scanner.search_full(::Regexp.new(delimeter), true, true)
82
+ doc = s[0...-delimeter.size-4]
83
+ case doc
84
+ when /\A\r\nContent-Type: text\/html\r
85
+ Content-ID: <frame-[A-Z0-9]{32}@mhtml\.blink>\r
86
+ Content-Transfer-Encoding: quoted-printable\r
87
+ Content-Location: chrome-error:\/\/chromewebdata\/\r\n\r\n/,
88
+ /\A\r\nContent-Type: text\/html\r
89
+ Content-ID: <frame-[A-Z0-9]{32}@mhtml\.blink>\r
90
+ Content-Transfer-Encoding: quoted-printable\r\n\r\n/
91
+ puts "trash #{$'.size}"
92
+ reps.push [prev-delimeter.size-2, scanner.pos-delimeter.size-4, "", ""]
93
+ when /\A\r\nContent-Type: text\/html\r
94
+ Content-ID: <frame-[A-Z0-9]{32}@mhtml\.blink>\r
95
+ Content-Transfer-Encoding: quoted-printable\r
96
+ Content-Location: \S+\r\n\r\n/
97
+ puts "html #{$'.size}"
98
+ header = $&
99
+ t = $'.gsub(/=([0-9A-F][0-9A-F])/){ fail $1 unless "3D" == $1 || "20" == $1 || "0A" == $1 unless "80" <= $1; $1.hex.chr }.gsub("=\r\n", "")
100
+ puts "unpacked #{t.size}"
101
+ html = ::Nokogiri::HTML t#.force_encoding "utf-8"
102
+
103
+ puts ".to_s.size #{html.to_s.size}"
104
+
105
+ html.xpath("//*[not(*)]").group_by(&:name).
106
+ map{ |_, g| [_, g.map(&:to_s).map(&:size).reduce(:+)] }.
107
+ sort_by(&:last).reverse.take(5).each &method(:p)
108
+
109
+ if block_given?
110
+ yield html
111
+ puts "yielded"
112
+ puts "yield #{html.to_s.size}"
113
+ end
114
+
115
+ reps.push [prev, scanner.pos-delimeter.size-4, header, html.to_s, true, :html]
116
+ when /\A\r\nContent-Type: text\/css\r
117
+ Content-Transfer-Encoding: quoted-printable\r
118
+ Content-Location: \S+\r\n\r\n/
119
+ puts "css > #{$'.size}"
120
+ header = $&
121
+ css = $'.gsub(/=([0-9A-F][0-9A-F])/){ fail $1 unless "3D" == $1 || "20" == $1 || "0A" == $1 unless "80" <= $1; $1.hex.chr }.gsub("=\r\n", "")
122
+ css.gsub!(/[\r\n]+/, "\n")
123
+
124
+ puts "css < #{css.size}"
125
+ reps.push [prev, scanner.pos-delimeter.size-4, header, css, true, :css]
126
+
127
+ when /\A\r\nContent-Type: image\/(webp|png|gif|jpeg)\r
128
+ Content-Transfer-Encoding: base64\r
129
+ Content-Location: \S+\r\n\r\n/
130
+ puts "#{$1} #{$'.size}"
131
+ when /\A\r\nContent-Type: image\/svg\+xml\r
132
+ Content-Transfer-Encoding: quoted-printable\r
133
+ Content-Location: \S+\r\n\r\n/
134
+ puts "svg #{$'.size}"
135
+ else
136
+ puts doc[0..300]
137
+ fail
25
138
  end
139
+ fail unless scanner.charpos == prev = scanner.pos
140
+ end
141
+
142
+ p is = reps.map.with_index{ |(_, _, _, _, _, type), i| i if :html == type }.compact
143
+ p cs = reps.map.with_index{ |(_, _, _, _, _, type), i| i if :css == type }.compact
144
+ cs.each_cons(2){ |i,j| fail unless i+1==j }
145
+ fail unless is == [cs[0]-1]
146
+ File.write "temp.htm", reps[is[0]][3]
147
+ puts "css > #{File.size "temp.css"}"
148
+ File.open("temp.css", "w"){ |f| cs.each{ |i| f.puts reps[i][3] } }
149
+ system "uncss temp.htm -s temp.css -o out.css"
150
+ puts "css < #{File.size "out.css"}"
151
+ reps[cs[0]][1] = reps[cs[-1]][1]
152
+ reps[cs[0]+1..cs[-1]] = []
153
+ reps[cs[0]][3] = File.read "out.css"
154
+
155
+ reps.reverse_each do |from, to, header, str, qp|
156
+ str = qp ?
157
+ header + str.gsub("=", "=3D").
158
+ b.gsub(/[\x80-\xFF]/n){ |_| "=%02X" % _.ord }.
159
+ gsub(/.{73}[^=][^=](?=.)/, "\\0=\r\n") :
160
+ header + str.gsub("\n", "\r\n")
161
+ p [str.size, "to - from = #{to - from}"]
162
+ mht[from...to] = str
26
163
  end
164
+ p ::File.write "temp.mht", mht
165
+ puts "OK"
27
166
  end
167
+
28
168
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ferrum_common
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Victor Maslov aka Nakilon
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-07 00:00:00.000000000 Z
11
+ date: 2023-09-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ferrum
@@ -24,6 +24,34 @@ dependencies:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: browser_reposition
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: nokogiri
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
27
55
  description:
28
56
  email: nakilon@gmail.com
29
57
  executables: []