ferrum_common 0.0.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3e8ef82a6987019ce9a01dffedf29704905fc53d7f3534cc1654b6a9a7c9220d
4
- data.tar.gz: b9d2a38f62cc43372635cfcf679f95868c735b5ad8fbf2d482ac9b640df2e06a
3
+ metadata.gz: e128a92c5b106f583d12cf8cc772b95637272dae11711e1095916e8485ccd9c5
4
+ data.tar.gz: c52cb59ea98be841986e6930e6a21456636491684cbe2d812f474dff996bb105
5
5
  SHA512:
6
- metadata.gz: 4a7044da6e7ec7c03d56d5b1b2e99f72988476cd93323af8fc9601db4fe9161b534dd8188f9ea57548676776ab05394f5d5c60d7a43fed6bc26ecbd21a93eb2e
7
- data.tar.gz: 474977a1cb5ec61a2f1ac62a47d275e34e803754c3554a8f5393c7634e0fe23abbf02fae4c7b1b9d9f48763266868fd00ec8e3df139a0ef3297dbc1a0bbeaaba
6
+ metadata.gz: aee22d614c6b08f217fcf0786083ef0b7a4aa0c154f79948916fe3433796c1255168df736ae6fdac82e3e0fd4d91ce309aaddadd0fc80e93087856e4cc355789
7
+ data.tar.gz: 6a2939158a8d3dc9da7938d63cad598fb005c88ef0307418ea9dd8a6f5eb3a8af34a1ad5c59257e23d7a99150a7c627520f9d26e1abe9c69ca4c3a0292f52546
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "ferrum_common"
3
- spec.version = "0.0.0"
3
+ spec.version = "0.2.0"
4
4
  spec.summary = "[WIP] common useful extensions for ferrum or cuprite"
5
5
 
6
6
  spec.author = "Victor Maslov aka Nakilon"
@@ -9,7 +9,9 @@ Gem::Specification.new do |spec|
9
9
  spec.metadata = {"source_code_uri" => "https://github.com/nakilon/ferrum_common"}
10
10
 
11
11
  spec.add_dependency "ferrum"
12
- spec.required_ruby_version = ">=2.5"
12
+ spec.add_dependency "browser_reposition"
13
+ spec.add_dependency "nokogiri"
14
+ spec.required_ruby_version = ">=2.5" # why?
13
15
 
14
16
  spec.files = %w{ LICENSE ferrum_common.gemspec lib/ferrum_common.rb }
15
17
  end
data/lib/ferrum_common.rb CHANGED
@@ -1,28 +1,168 @@
1
1
  require "ferrum"
2
2
  module FerrumCommon
3
- def self.new **_
4
- Ferrum::Browser.new(**_).tap do |browser|
5
- require "browser_reposition"
6
- browser.extend(BrowserReposition).reposition
7
- browser.define_singleton_method :redo_until_true do |timeout, msg = nil, &block|
8
- Timeout.timeout timeout do
9
- begin
10
- block.call
11
- rescue Ferrum::NodeNotFoundError
12
- redo
13
- end or (sleep timeout*0.1; redo)
3
+
4
+ module Common
5
+
6
+ private def mhtml browser, timeout, mtd, msg = nil
7
+ Timeout.timeout(timeout){ yield }
8
+ rescue Timeout::Error
9
+ browser.mhtml path: "temp.mhtml"
10
+ STDERR.puts "dumped to ./temp.mhtml"
11
+ $!.backtrace.reject!{ |_| _[/\/gems\/concurrent-ruby-/] }
12
+ $!.backtrace.reject!{ |_| _[/\/gems\/ferrum-/] }
13
+ raise Timeout::Error, "#{$!.to_s} after #{timeout} sec in #{mtd}#{" (#{msg.respond_to?(:call) ? msg.call : msg})" if msg}"
14
+ end
15
+
16
+ def until_true timeout, msg = nil
17
+ mhtml self, timeout, __method__, msg do
18
+ begin
19
+ yield
20
+ rescue Ferrum::NodeNotFoundError
21
+ redo
22
+ end or (sleep timeout*0.1; redo)
23
+ end
24
+ end
25
+
26
+ def until_one type, selector, timeout
27
+ t = nil
28
+ mhtml self, timeout, __method__, ->{ "expected exactly one node for #{type} #{selector.inspect}, got #{t ? t.size : "none"}" } do
29
+ t = begin
30
+ public_method(type).call selector
31
+ end
32
+ unless 1 == t.size
33
+ sleep timeout * 0.1
34
+ redo
14
35
  end
15
- rescue Timeout::Error
16
- browser.mhtml path: "temp.mhtml"
17
- $!.backtrace.reject!{ |_| _[/\/gems\/concurrent-ruby-/] }
18
- $!.backtrace.reject!{ |_| _[/\/gems\/ferrum-/] }
19
- raise Timeout::Error, "#{$!.to_s} in redo_until_true #{" (#{msg})" if msg}"
20
36
  end
21
- browser.define_singleton_method :abort do |msg|
22
- Browser.mhtml path: "temp.mhtml"
23
- puts Thread.current.backtrace
24
- abort msg
37
+ t.first
38
+ end
39
+
40
+ def abort msg_or_cause
41
+ # puts (msg_or_cause.respond_to?(:backtrace) ? msg_or_cause : Thread.current).backtrace
42
+ puts (msg_or_cause.respond_to?(:full_message) ? msg_or_cause.full_message : Thread.current.backtrace)
43
+ mhtml path: "temp.mhtml"
44
+ STDERR.puts "dumped to ./temp.mhtml"
45
+ Kernel.abort msg_or_cause.to_s
46
+ end
47
+
48
+ end
49
+ Ferrum::Page.include Common
50
+ Ferrum::Frame.include Common
51
+
52
+ if "darwin" == Gem::Platform.local.os
53
+ require "browser_reposition"
54
+ Ferrum::Browser.include Common, BrowserReposition
55
+ def self.new **_
56
+ Ferrum::Browser.new(**_).tap(&:reposition)
57
+ end
58
+ else
59
+ Ferrum::Browser.include Common
60
+ def self.new **_
61
+ Ferrum::Browser.new **_
62
+ end
63
+ end
64
+
65
+ # https://datatracker.ietf.org/doc/html/rfc2557
66
+ # https://en.wikipedia.org/wiki/Quoted-printable
67
+ # require "strscan"
68
+ require "nokogiri" # Oga crashes on vk charset
69
+ def self.process_mhtml mht
70
+ scanner = ::StringScanner.new mht
71
+ fail scanner.peek(100).inspect unless scanner.scan(/\AFrom: <Saved by Blink>\r
72
+ Snapshot-Content-Location: \S+\r
73
+ Subject:(?: \S+\r\n)+Date: [A-Z][a-z][a-z], \d\d? [A-Z][a-z][a-z] 20\d\d \d\d:\d\d:\d\d -0000\r
74
+ MIME-Version: 1\.0\r
75
+ Content-Type: multipart\/related;\r
76
+ \ttype="text\/html";\r
77
+ \tboundary="(----MultipartBoundary--[a-zA-Z0-9]{42}----)"\r\n\r\n\r\n--\1/)
78
+ delimeter = scanner[1]
79
+ fail unless scanner.charpos == prev = scanner.pos
80
+ reps = []
81
+ while s = scanner.search_full(::Regexp.new(delimeter), true, true)
82
+ doc = s[0...-delimeter.size-4]
83
+ case doc
84
+ when /\A\r\nContent-Type: text\/html\r
85
+ Content-ID: <frame-[A-Z0-9]{32}@mhtml\.blink>\r
86
+ Content-Transfer-Encoding: quoted-printable\r
87
+ Content-Location: chrome-error:\/\/chromewebdata\/\r\n\r\n/,
88
+ /\A\r\nContent-Type: text\/html\r
89
+ Content-ID: <frame-[A-Z0-9]{32}@mhtml\.blink>\r
90
+ Content-Transfer-Encoding: quoted-printable\r\n\r\n/
91
+ puts "trash #{$'.size}"
92
+ reps.push [prev-delimeter.size-2, scanner.pos-delimeter.size-4, "", ""]
93
+ when /\A\r\nContent-Type: text\/html\r
94
+ Content-ID: <frame-[A-Z0-9]{32}@mhtml\.blink>\r
95
+ Content-Transfer-Encoding: quoted-printable\r
96
+ Content-Location: \S+\r\n\r\n/
97
+ puts "html #{$'.size}"
98
+ header = $&
99
+ t = $'.gsub(/=([0-9A-F][0-9A-F])/){ fail $1 unless "3D" == $1 || "20" == $1 || "0A" == $1 unless "80" <= $1; $1.hex.chr }.gsub("=\r\n", "")
100
+ puts "unpacked #{t.size}"
101
+ html = ::Nokogiri::HTML t#.force_encoding "utf-8"
102
+
103
+ puts ".to_s.size #{html.to_s.size}"
104
+
105
+ html.xpath("//*[not(*)]").group_by(&:name).
106
+ map{ |_, g| [_, g.map(&:to_s).map(&:size).reduce(:+)] }.
107
+ sort_by(&:last).reverse.take(5).each &method(:p)
108
+
109
+ if block_given?
110
+ yield html
111
+ puts "yielded"
112
+ puts "yield #{html.to_s.size}"
113
+ end
114
+
115
+ reps.push [prev, scanner.pos-delimeter.size-4, header, html.to_s, true, :html]
116
+ when /\A\r\nContent-Type: text\/css\r
117
+ Content-Transfer-Encoding: quoted-printable\r
118
+ Content-Location: \S+\r\n\r\n/
119
+ puts "css > #{$'.size}"
120
+ header = $&
121
+ css = $'.gsub(/=([0-9A-F][0-9A-F])/){ fail $1 unless "3D" == $1 || "20" == $1 || "0A" == $1 unless "80" <= $1; $1.hex.chr }.gsub("=\r\n", "")
122
+ css.gsub!(/[\r\n]+/, "\n")
123
+
124
+ puts "css < #{css.size}"
125
+ reps.push [prev, scanner.pos-delimeter.size-4, header, css, true, :css]
126
+
127
+ when /\A\r\nContent-Type: image\/(webp|png|gif|jpeg)\r
128
+ Content-Transfer-Encoding: base64\r
129
+ Content-Location: \S+\r\n\r\n/
130
+ puts "#{$1} #{$'.size}"
131
+ when /\A\r\nContent-Type: image\/svg\+xml\r
132
+ Content-Transfer-Encoding: quoted-printable\r
133
+ Content-Location: \S+\r\n\r\n/
134
+ puts "svg #{$'.size}"
135
+ else
136
+ puts doc[0..300]
137
+ fail
25
138
  end
139
+ fail unless scanner.charpos == prev = scanner.pos
140
+ end
141
+
142
+ p is = reps.map.with_index{ |(_, _, _, _, _, type), i| i if :html == type }.compact
143
+ p cs = reps.map.with_index{ |(_, _, _, _, _, type), i| i if :css == type }.compact
144
+ cs.each_cons(2){ |i,j| fail unless i+1==j }
145
+ fail unless is == [cs[0]-1]
146
+ File.write "temp.htm", reps[is[0]][3]
147
+ puts "css > #{File.size "temp.css"}"
148
+ File.open("temp.css", "w"){ |f| cs.each{ |i| f.puts reps[i][3] } }
149
+ system "uncss temp.htm -s temp.css -o out.css"
150
+ puts "css < #{File.size "out.css"}"
151
+ reps[cs[0]][1] = reps[cs[-1]][1]
152
+ reps[cs[0]+1..cs[-1]] = []
153
+ reps[cs[0]][3] = File.read "out.css"
154
+
155
+ reps.reverse_each do |from, to, header, str, qp|
156
+ str = qp ?
157
+ header + str.gsub("=", "=3D").
158
+ b.gsub(/[\x80-\xFF]/n){ |_| "=%02X" % _.ord }.
159
+ gsub(/.{73}[^=][^=](?=.)/, "\\0=\r\n") :
160
+ header + str.gsub("\n", "\r\n")
161
+ p [str.size, "to - from = #{to - from}"]
162
+ mht[from...to] = str
26
163
  end
164
+ p ::File.write "temp.mht", mht
165
+ puts "OK"
27
166
  end
167
+
28
168
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ferrum_common
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Victor Maslov aka Nakilon
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-07 00:00:00.000000000 Z
11
+ date: 2023-09-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ferrum
@@ -24,6 +24,34 @@ dependencies:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: browser_reposition
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: nokogiri
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
27
55
  description:
28
56
  email: nakilon@gmail.com
29
57
  executables: []