pagerecognizer 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/lib/pagerecognizer.rb +248 -0
- data/pagerecognizer.gemspec +23 -0
- data/test.rb +28 -0
- metadata +147 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 61a30cc5e39e171b8eabdf26490475ce3fb041b9
|
4
|
+
data.tar.gz: e64d2e3700730de8e9d3fe4d4de57f4743ca98b9
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: cd4370f97135ac3df6376c2df1dcbb39cd25a35fac0ad172d29fa86764d8fb82d7ec28da7d4aaf0a547f1684cdda34f2d4de1b14de5c66ed784bb798643e96cd
|
7
|
+
data.tar.gz: e517075c5eb9d4efdb5bc851865136776df7dda1eacaecb96e3d89f211c63c6a5429b0ee4286280d150f7ea338662ceebdc0f9b0039d7df8a6a2778b349629f6
|
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2020 Victor Maslov
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
@@ -0,0 +1,248 @@
|
|
1
|
+
module PageRecognizer
|
2
|
+
class << self
|
3
|
+
attr_accessor :logger
|
4
|
+
end
|
5
|
+
require "logger"
|
6
|
+
self.logger = Logger.new STDOUT
|
7
|
+
|
8
|
+
module Dumpable
|
9
|
+
def dump
|
10
|
+
"<html><body>#{
|
11
|
+
map.with_index do |n, i|
|
12
|
+
"<div style='position: absolute; background-color: hsla(#{
|
13
|
+
360 * i / size
|
14
|
+
},100%,50%,0.5); top: #{n.top}; left: #{n.left}; width: #{n.width}; height: #{n.height}'>#{
|
15
|
+
n.node.tag_name.upcase
|
16
|
+
}</div>"
|
17
|
+
end.join
|
18
|
+
}</body></html>"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.load str
|
23
|
+
require "nokogiri"
|
24
|
+
Nokogiri::HTML(str).css("div").map do |n|
|
25
|
+
Struct.new(*%i{ node top left width height }).new Struct.new(:tag_name).new(n.text),
|
26
|
+
*n[:style].scan(/(\S+): ([^\;]+)/).to_h.values_at(
|
27
|
+
*%w{ top left width height }
|
28
|
+
).map(&:to_f)
|
29
|
+
end.extend Dumpable
|
30
|
+
end
|
31
|
+
|
32
|
+
def recognize
|
33
|
+
logger = Module.nesting.first.logger
|
34
|
+
|
35
|
+
nodes = []
|
36
|
+
try = lambda do
|
37
|
+
prev = nodes
|
38
|
+
code = "( function(node) {
|
39
|
+
var x = scrollX, y = scrollY;
|
40
|
+
var _tap = function(x, f){ f(); return x };
|
41
|
+
var f = function(node) {
|
42
|
+
node.scrollIntoView();
|
43
|
+
var rect = JSON.parse(JSON.stringify(node.getBoundingClientRect()));
|
44
|
+
var child_nodes = Array.from(node.childNodes).filter(function(node) { return node.nodeType == 1 });
|
45
|
+
var clickable;
|
46
|
+
if (node.nodeName == 'svg') {
|
47
|
+
var states = child_nodes.map( function(n){
|
48
|
+
return _tap(n.style ? n.style.display : '', function(){ n.style.display = 'none' } );
|
49
|
+
} );
|
50
|
+
clickable = (node === document.elementFromPoint(rect.x + rect.width/2, rect.y + rect.height/2));
|
51
|
+
var _zip = function(a, b){ return a.map( function(e, i) { return [e, b[i]] } ) };
|
52
|
+
_zip(child_nodes, states).forEach( function(_){ _[0].style.display = _[1] } );
|
53
|
+
} else {
|
54
|
+
clickable = (node === document.elementFromPoint(rect.x + rect.width/2, rect.y + rect.height/2));
|
55
|
+
};
|
56
|
+
rect.top += scrollY;
|
57
|
+
rect.left += scrollX;
|
58
|
+
return [ [
|
59
|
+
rect.top, rect.left, rect.width, rect.height, clickable, node
|
60
|
+
] ].concat(node.nodeName == 'svg' ? [] : child_nodes.flatMap(f));
|
61
|
+
};
|
62
|
+
return _tap(f(node), function(){ scrollTo(x, y) });
|
63
|
+
} )(arguments[0])"
|
64
|
+
str = Struct.new :top, :left, :width, :height, :clickable, :node
|
65
|
+
nodes = page.evaluate(code, self).map{ |s| str.new *s }
|
66
|
+
nodes.size == prev.size
|
67
|
+
end
|
68
|
+
|
69
|
+
if defined? Selenium::WebDriver::Wait
|
70
|
+
Selenium::WebDriver::Wait.new(
|
71
|
+
message: "number of DOM elements didn't stop to change"
|
72
|
+
).until &try
|
73
|
+
else
|
74
|
+
t = Time.now
|
75
|
+
until try.call
|
76
|
+
fail "number of DOM elements didn't stop to change" if Time.now > t + 5
|
77
|
+
end
|
78
|
+
end
|
79
|
+
logger.info "#{nodes.size} DOM nodes found"
|
80
|
+
|
81
|
+
nodes.select! &:clickable
|
82
|
+
nodes.reject do |n|
|
83
|
+
nodes.any? do |nn|
|
84
|
+
cs = [
|
85
|
+
nn.top <=> n.top,
|
86
|
+
nn.left <=> n.left,
|
87
|
+
n.left + n.width <=> nn.left + nn.width,
|
88
|
+
n.top + n.height <=> nn.top + nn.height,
|
89
|
+
]
|
90
|
+
cs.include?(1) && !cs.include?(-1)
|
91
|
+
end
|
92
|
+
end.extend Dumpable
|
93
|
+
end
|
94
|
+
|
95
|
+
private def recognize_more
|
96
|
+
logger = Module.nesting.first.logger
|
97
|
+
|
98
|
+
nodes = []
|
99
|
+
try = lambda do
|
100
|
+
prev = nodes
|
101
|
+
code = "( function(node) {
|
102
|
+
var x = scrollX, y = scrollY;
|
103
|
+
var _tap = function(x, f){ f(); return x };
|
104
|
+
var f = function(node) {
|
105
|
+
node.scrollIntoView();
|
106
|
+
var rect = JSON.parse(JSON.stringify(node.getBoundingClientRect()));
|
107
|
+
rect.top += scrollY;
|
108
|
+
rect.left += scrollX;
|
109
|
+
return [ [
|
110
|
+
node, JSON.stringify([rect.top, rect.left, rect.width, rect.height])
|
111
|
+
] ].concat(Array.from(node.childNodes).filter(function(node) { return node.nodeType == 1 }).flatMap(f));
|
112
|
+
};
|
113
|
+
return _tap(f(node), function(){ scrollTo(x, y) });
|
114
|
+
} )(arguments[0])"
|
115
|
+
str = Struct.new :node, :top, :left, :width, :height
|
116
|
+
nodes = page.evaluate(code, self).map{ |node, a| str.new node, *JSON.load(a) }
|
117
|
+
nodes.size == prev.size
|
118
|
+
end
|
119
|
+
|
120
|
+
if defined? Selenium::WebDriver::Wait
|
121
|
+
Selenium::WebDriver::Wait.new(
|
122
|
+
message: "number of DOM elements didn't stop to change"
|
123
|
+
).until &try
|
124
|
+
else
|
125
|
+
t = Time.now
|
126
|
+
until try.call
|
127
|
+
fail "number of DOM elements didn't stop to change" if Time.now > t + 10
|
128
|
+
end
|
129
|
+
end
|
130
|
+
logger.info "#{nodes.size} DOM nodes found"
|
131
|
+
|
132
|
+
nodes.reject!{ |i| i.height.zero? || i.width.zero? }
|
133
|
+
nodes
|
134
|
+
end
|
135
|
+
|
136
|
+
logging_error = Class.new RuntimeError do
|
137
|
+
attr_reader :dumps
|
138
|
+
def initialize msg, arrays
|
139
|
+
Module.nesting.first.logger.error "#{self.class}: #{msg}"
|
140
|
+
@dumps = arrays.map{ |name, array| [name, array.extend(Dumpable).dump] }.to_h
|
141
|
+
super msg
|
142
|
+
end
|
143
|
+
end
|
144
|
+
class ErrorNotEnoughNodes < logging_error ; end
|
145
|
+
|
146
|
+
private def split heuristics, hh, ww, tt, ll
|
147
|
+
logger = Module.nesting.first.logger
|
148
|
+
|
149
|
+
unstale = unless defined? Selenium::WebDriver::Error::StaleElementReferenceError
|
150
|
+
->(&b){ b.call }
|
151
|
+
else
|
152
|
+
lambda do |&try|
|
153
|
+
t = Time.now
|
154
|
+
begin
|
155
|
+
try.call
|
156
|
+
rescue Selenium::WebDriver::Error::StaleElementReferenceError
|
157
|
+
raise if Time.now > t + 10
|
158
|
+
retry
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
162
|
+
all = unstale.call do recognize_more end.sort_by(&tt)
|
163
|
+
logger.info "all nodes: #{all.size}"
|
164
|
+
rect = page.evaluate("( function(node) { return JSON.parse(JSON.stringify(node.getBoundingClientRect())) } )(arguments[0])", self)
|
165
|
+
inside = all.reject{ |i| i.left < rect["left"] || i.left + i.width > rect["right"] || i.top < rect["top"] || i.top + i.height > rect["bottom"] }
|
166
|
+
raise ErrorNotEnoughNodes.new "no inside nodes", all: all, inside: inside if inside.empty?
|
167
|
+
logger.info "inside nodes: #{inside.size}"
|
168
|
+
nodes = unstale.call do inside.reject{ |i| %w{ button script svg path a img span }.include? i.node.tag_name } end.uniq{ |i| [i[hh], i[ww], i[tt], i[ll]] }
|
169
|
+
logger.info "good nodes: #{nodes.size}" # only those that might be containers
|
170
|
+
|
171
|
+
large = nodes#.select{ |i| i[ww] > nodes.map(&ww).max / 4 }
|
172
|
+
logger.info "large enough and unique: #{large.size}"
|
173
|
+
|
174
|
+
interfere = lambda do |a, b|
|
175
|
+
a[tt] < b[tt] + b[hh] &&
|
176
|
+
b[tt] < a[tt] + a[hh]
|
177
|
+
end
|
178
|
+
|
179
|
+
rest = large.select.with_index do |a, i|
|
180
|
+
large.each_with_index.none? do |b, j|
|
181
|
+
next if i == j
|
182
|
+
a[tt] >= b[tt] && a[tt] + a[hh] <= b[tt] + b[hh] &&
|
183
|
+
large.all?{ |c| interfere[a, c] == interfere[b, c] }
|
184
|
+
end
|
185
|
+
end
|
186
|
+
logger.info "not nested: #{rest.size}"
|
187
|
+
# rest = rest.sample 50
|
188
|
+
|
189
|
+
# adding the :area field for faster upcoming computations
|
190
|
+
struct = Struct.new *large.first.members, :area
|
191
|
+
rest.map!{ |i| struct.new *i.values, i.width * i.height }
|
192
|
+
|
193
|
+
require "pcbr"
|
194
|
+
pcbr = PCBR.new
|
195
|
+
is = []
|
196
|
+
max, past = 0, []
|
197
|
+
prev = nil
|
198
|
+
time = Time.now
|
199
|
+
loop do
|
200
|
+
rest.each_with_index do |node, i|
|
201
|
+
next if is.any?{ |j| i == j || interfere[rest[i], rest[j]] }
|
202
|
+
sol = rest.values_at *is, i
|
203
|
+
pcbr.store [*is, i].sort, [
|
204
|
+
*( is.size if heuristics.include? :SIZE ),
|
205
|
+
*( sol.map(&:area).inject(:+) if heuristics.include? :AREA ),
|
206
|
+
*( -sol.product(sol).map{ |s1, s2| (s1.width - s2.width ).abs }.inject(:+) / sol.size / sol.size if heuristics.include? :WIDTH ),
|
207
|
+
*( -sol.product(sol).map{ |s1, s2| (s1.height - s2.height ).abs }.inject(:+) / sol.size / sol.size if heuristics.include? :HEIGHT ),
|
208
|
+
*( -sol.product(sol).map{ |s1, s2| (s1[ll] + s1[ww] / 2.0 - s2[ll] - s2[ww] / 2.0).abs }.inject(:+) / sol.size / sol.size if heuristics.include? :MIDDLE ),
|
209
|
+
] unless pcbr.table.assoc [*is, i].sort
|
210
|
+
end
|
211
|
+
if prev && Time.now - time > 1 && (Time.now - prev > (prev - time))
|
212
|
+
m = pcbr.table.reject{ |i| i.first.size == 1 }.map(&:last).max
|
213
|
+
break if 1 == pcbr.table.count{ |i| i.last == m } || Time.now - time > 5
|
214
|
+
end
|
215
|
+
break unless t = pcbr.table.reject{ |is,| past.include? is.map{ |i| 2**i }.inject(:+) }.max_by(&:last)
|
216
|
+
if t.last > max
|
217
|
+
prev, max = Time.now, t.last
|
218
|
+
logger.debug [Time.now - time, max, t.first]
|
219
|
+
end
|
220
|
+
past.push (is = t.first).map{ |i| 2**i }.inject(:+)
|
221
|
+
end
|
222
|
+
# TODO: if multiple with max score, take the max by area
|
223
|
+
unless best = pcbr.table.reject{ |is,| is.size == 1 }.max_by(&:last)
|
224
|
+
raise ErrorNotEnoughNodes.new "failed to split <#{tag_name}>", all: all, inside: inside, nodes: nodes, large: large, rest: rest
|
225
|
+
end
|
226
|
+
rest.values_at(*best.first).extend(Dumpable)
|
227
|
+
end
|
228
|
+
|
229
|
+
def rows *heuristics
|
230
|
+
heuristics = %i{ AREA HEIGHT WIDTH } if heuristics.empty?
|
231
|
+
split heuristics, :height, :width, :top, :left
|
232
|
+
end
|
233
|
+
def cols *heuristics
|
234
|
+
heuristics = %i{ AREA HEIGHT WIDTH } if heuristics.empty?
|
235
|
+
split heuristics, :width, :height, :left, :top
|
236
|
+
end
|
237
|
+
|
238
|
+
|
239
|
+
end
|
240
|
+
|
241
|
+
if defined? Ferrum::Frame::Runtime
|
242
|
+
Ferrum::Node.include PageRecognizer
|
243
|
+
Ferrum::Frame::Runtime.module_eval do
|
244
|
+
def cyclic? object_id
|
245
|
+
@page.command "Runtime.callFunctionOn", objectId: object_id, returnByValue: true, functionDeclaration: "function(){return false}"
|
246
|
+
end
|
247
|
+
end
|
248
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
Gem::Specification.new do |spec|
|
2
|
+
spec.name = "pagerecognizer"
|
3
|
+
spec.version = "0.0.1"
|
4
|
+
spec.summary = "visual HTML page structure recognizer"
|
5
|
+
|
6
|
+
spec.author = "Victor Maslov aka Nakilon"
|
7
|
+
spec.email = "nakilon@gmail.com"
|
8
|
+
spec.license = "MIT"
|
9
|
+
spec.metadata = {"source_code_uri" => "https://github.com/nakilon/pagerecognizer"}
|
10
|
+
|
11
|
+
spec.add_dependency "nokogiri"
|
12
|
+
spec.add_dependency "pcbr"
|
13
|
+
spec.add_dependency "ferrum"
|
14
|
+
spec.add_development_dependency "minitest"
|
15
|
+
|
16
|
+
spec.add_development_dependency "ruby-prof"
|
17
|
+
spec.add_development_dependency "byebug"
|
18
|
+
spec.add_development_dependency "mll"
|
19
|
+
|
20
|
+
spec.require_path = "lib"
|
21
|
+
spec.test_file = "test.rb"
|
22
|
+
spec.files = %w{ LICENSE pagerecognizer.gemspec lib/pagerecognizer.rb }
|
23
|
+
end
|
data/test.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
require "minitest/autorun"
|
2
|
+
require "ferrum"
|
3
|
+
require_relative "lib/pagerecognizer"
|
4
|
+
Ferrum::Node.include PageRecognizer
|
5
|
+
|
6
|
+
describe PageRecognizer do
|
7
|
+
it "google" do
|
8
|
+
browser = Ferrum::Browser.new **(ENV.has_key?("FERRUM_NO_SANDBOX") ? {browser_options: {"no-sandbox": nil}} : {})
|
9
|
+
browser.goto "about:blank"
|
10
|
+
browser.execute "document.write(#{File.read("google.htm").inspect})"
|
11
|
+
results = browser.at_css("body").rows
|
12
|
+
width = results.group_by(&:width).max_by{ |w, g| g.size }.first
|
13
|
+
assert_equal [
|
14
|
+
["https://www.ruby-lang.org/ru/", "Ruby это... динамический язык программирования с о"],
|
15
|
+
["https://ru.wikibooks.org/wiki/Ruby", "Этот учебник намерен осветить все тонкости програм"],
|
16
|
+
["https://habr.com/ru/post/433672/", "19 дек. 2018 г. - Взрывной рост интереса к Ruby ос"],
|
17
|
+
["https://habr.com/ru/hub/ruby/", "Ruby (англ. Ruby — «Рубин») — динамический, рефлек"],
|
18
|
+
["https://web-creator.ru/articles/ruby", "Ruby разрабатывался на Linux, но работает на многи"],
|
19
|
+
["http://rusrails.ru/", "Ruby on Rails руководства, учебники, статьи на рус"],
|
20
|
+
["https://vc.ru/dev/72391-pochemu-my-vybiraem-ruby-d", "20 июн. 2019 г. - Ruby on Rails одним из первых на"],
|
21
|
+
["https://tproger.ru/tag/ruby/", "Django или Ruby on Rails: какой фреймворк выбрать?"],
|
22
|
+
["https://rubyrussia.club/", "Главная российская конференция о Ruby. Расширяем г"]
|
23
|
+
], results.select{ |r| r.width == width }.map(&:node).map(&:rows).map{ |link, desc| [
|
24
|
+
link.node.at_css("a").property("href")[0,50],
|
25
|
+
desc.node.text[0,50],
|
26
|
+
] }
|
27
|
+
end
|
28
|
+
end
|
metadata
ADDED
@@ -0,0 +1,147 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: pagerecognizer
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Victor Maslov aka Nakilon
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2020-09-24 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: nokogiri
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: pcbr
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: ferrum
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: minitest
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: ruby-prof
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: byebug
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: mll
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
111
|
+
description:
|
112
|
+
email: nakilon@gmail.com
|
113
|
+
executables: []
|
114
|
+
extensions: []
|
115
|
+
extra_rdoc_files: []
|
116
|
+
files:
|
117
|
+
- LICENSE
|
118
|
+
- lib/pagerecognizer.rb
|
119
|
+
- pagerecognizer.gemspec
|
120
|
+
- test.rb
|
121
|
+
homepage:
|
122
|
+
licenses:
|
123
|
+
- MIT
|
124
|
+
metadata:
|
125
|
+
source_code_uri: https://github.com/nakilon/pagerecognizer
|
126
|
+
post_install_message:
|
127
|
+
rdoc_options: []
|
128
|
+
require_paths:
|
129
|
+
- lib
|
130
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
131
|
+
requirements:
|
132
|
+
- - ">="
|
133
|
+
- !ruby/object:Gem::Version
|
134
|
+
version: '0'
|
135
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
136
|
+
requirements:
|
137
|
+
- - ">="
|
138
|
+
- !ruby/object:Gem::Version
|
139
|
+
version: '0'
|
140
|
+
requirements: []
|
141
|
+
rubyforge_project:
|
142
|
+
rubygems_version: 2.5.2.3
|
143
|
+
signing_key:
|
144
|
+
specification_version: 4
|
145
|
+
summary: visual HTML page structure recognizer
|
146
|
+
test_files:
|
147
|
+
- test.rb
|