pagerecognizer 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/lib/pagerecognizer.rb +248 -0
- data/pagerecognizer.gemspec +23 -0
- data/test.rb +28 -0
- metadata +147 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 61a30cc5e39e171b8eabdf26490475ce3fb041b9
|
4
|
+
data.tar.gz: e64d2e3700730de8e9d3fe4d4de57f4743ca98b9
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: cd4370f97135ac3df6376c2df1dcbb39cd25a35fac0ad172d29fa86764d8fb82d7ec28da7d4aaf0a547f1684cdda34f2d4de1b14de5c66ed784bb798643e96cd
|
7
|
+
data.tar.gz: e517075c5eb9d4efdb5bc851865136776df7dda1eacaecb96e3d89f211c63c6a5429b0ee4286280d150f7ea338662ceebdc0f9b0039d7df8a6a2778b349629f6
|
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2020 Victor Maslov
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
@@ -0,0 +1,248 @@
|
|
1
|
+
module PageRecognizer
|
2
|
+
class << self
|
3
|
+
attr_accessor :logger
|
4
|
+
end
|
5
|
+
require "logger"
|
6
|
+
self.logger = Logger.new STDOUT
|
7
|
+
|
8
|
+
module Dumpable
|
9
|
+
def dump
|
10
|
+
"<html><body>#{
|
11
|
+
map.with_index do |n, i|
|
12
|
+
"<div style='position: absolute; background-color: hsla(#{
|
13
|
+
360 * i / size
|
14
|
+
},100%,50%,0.5); top: #{n.top}; left: #{n.left}; width: #{n.width}; height: #{n.height}'>#{
|
15
|
+
n.node.tag_name.upcase
|
16
|
+
}</div>"
|
17
|
+
end.join
|
18
|
+
}</body></html>"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.load str
|
23
|
+
require "nokogiri"
|
24
|
+
Nokogiri::HTML(str).css("div").map do |n|
|
25
|
+
Struct.new(*%i{ node top left width height }).new Struct.new(:tag_name).new(n.text),
|
26
|
+
*n[:style].scan(/(\S+): ([^\;]+)/).to_h.values_at(
|
27
|
+
*%w{ top left width height }
|
28
|
+
).map(&:to_f)
|
29
|
+
end.extend Dumpable
|
30
|
+
end
|
31
|
+
|
32
|
+
def recognize
|
33
|
+
logger = Module.nesting.first.logger
|
34
|
+
|
35
|
+
nodes = []
|
36
|
+
try = lambda do
|
37
|
+
prev = nodes
|
38
|
+
code = "( function(node) {
|
39
|
+
var x = scrollX, y = scrollY;
|
40
|
+
var _tap = function(x, f){ f(); return x };
|
41
|
+
var f = function(node) {
|
42
|
+
node.scrollIntoView();
|
43
|
+
var rect = JSON.parse(JSON.stringify(node.getBoundingClientRect()));
|
44
|
+
var child_nodes = Array.from(node.childNodes).filter(function(node) { return node.nodeType == 1 });
|
45
|
+
var clickable;
|
46
|
+
if (node.nodeName == 'svg') {
|
47
|
+
var states = child_nodes.map( function(n){
|
48
|
+
return _tap(n.style ? n.style.display : '', function(){ n.style.display = 'none' } );
|
49
|
+
} );
|
50
|
+
clickable = (node === document.elementFromPoint(rect.x + rect.width/2, rect.y + rect.height/2));
|
51
|
+
var _zip = function(a, b){ return a.map( function(e, i) { return [e, b[i]] } ) };
|
52
|
+
_zip(child_nodes, states).forEach( function(_){ _[0].style.display = _[1] } );
|
53
|
+
} else {
|
54
|
+
clickable = (node === document.elementFromPoint(rect.x + rect.width/2, rect.y + rect.height/2));
|
55
|
+
};
|
56
|
+
rect.top += scrollY;
|
57
|
+
rect.left += scrollX;
|
58
|
+
return [ [
|
59
|
+
rect.top, rect.left, rect.width, rect.height, clickable, node
|
60
|
+
] ].concat(node.nodeName == 'svg' ? [] : child_nodes.flatMap(f));
|
61
|
+
};
|
62
|
+
return _tap(f(node), function(){ scrollTo(x, y) });
|
63
|
+
} )(arguments[0])"
|
64
|
+
str = Struct.new :top, :left, :width, :height, :clickable, :node
|
65
|
+
nodes = page.evaluate(code, self).map{ |s| str.new *s }
|
66
|
+
nodes.size == prev.size
|
67
|
+
end
|
68
|
+
|
69
|
+
if defined? Selenium::WebDriver::Wait
|
70
|
+
Selenium::WebDriver::Wait.new(
|
71
|
+
message: "number of DOM elements didn't stop to change"
|
72
|
+
).until &try
|
73
|
+
else
|
74
|
+
t = Time.now
|
75
|
+
until try.call
|
76
|
+
fail "number of DOM elements didn't stop to change" if Time.now > t + 5
|
77
|
+
end
|
78
|
+
end
|
79
|
+
logger.info "#{nodes.size} DOM nodes found"
|
80
|
+
|
81
|
+
nodes.select! &:clickable
|
82
|
+
nodes.reject do |n|
|
83
|
+
nodes.any? do |nn|
|
84
|
+
cs = [
|
85
|
+
nn.top <=> n.top,
|
86
|
+
nn.left <=> n.left,
|
87
|
+
n.left + n.width <=> nn.left + nn.width,
|
88
|
+
n.top + n.height <=> nn.top + nn.height,
|
89
|
+
]
|
90
|
+
cs.include?(1) && !cs.include?(-1)
|
91
|
+
end
|
92
|
+
end.extend Dumpable
|
93
|
+
end
|
94
|
+
|
95
|
+
private def recognize_more
|
96
|
+
logger = Module.nesting.first.logger
|
97
|
+
|
98
|
+
nodes = []
|
99
|
+
try = lambda do
|
100
|
+
prev = nodes
|
101
|
+
code = "( function(node) {
|
102
|
+
var x = scrollX, y = scrollY;
|
103
|
+
var _tap = function(x, f){ f(); return x };
|
104
|
+
var f = function(node) {
|
105
|
+
node.scrollIntoView();
|
106
|
+
var rect = JSON.parse(JSON.stringify(node.getBoundingClientRect()));
|
107
|
+
rect.top += scrollY;
|
108
|
+
rect.left += scrollX;
|
109
|
+
return [ [
|
110
|
+
node, JSON.stringify([rect.top, rect.left, rect.width, rect.height])
|
111
|
+
] ].concat(Array.from(node.childNodes).filter(function(node) { return node.nodeType == 1 }).flatMap(f));
|
112
|
+
};
|
113
|
+
return _tap(f(node), function(){ scrollTo(x, y) });
|
114
|
+
} )(arguments[0])"
|
115
|
+
str = Struct.new :node, :top, :left, :width, :height
|
116
|
+
nodes = page.evaluate(code, self).map{ |node, a| str.new node, *JSON.load(a) }
|
117
|
+
nodes.size == prev.size
|
118
|
+
end
|
119
|
+
|
120
|
+
if defined? Selenium::WebDriver::Wait
|
121
|
+
Selenium::WebDriver::Wait.new(
|
122
|
+
message: "number of DOM elements didn't stop to change"
|
123
|
+
).until &try
|
124
|
+
else
|
125
|
+
t = Time.now
|
126
|
+
until try.call
|
127
|
+
fail "number of DOM elements didn't stop to change" if Time.now > t + 10
|
128
|
+
end
|
129
|
+
end
|
130
|
+
logger.info "#{nodes.size} DOM nodes found"
|
131
|
+
|
132
|
+
nodes.reject!{ |i| i.height.zero? || i.width.zero? }
|
133
|
+
nodes
|
134
|
+
end
|
135
|
+
|
136
|
+
logging_error = Class.new RuntimeError do
|
137
|
+
attr_reader :dumps
|
138
|
+
def initialize msg, arrays
|
139
|
+
Module.nesting.first.logger.error "#{self.class}: #{msg}"
|
140
|
+
@dumps = arrays.map{ |name, array| [name, array.extend(Dumpable).dump] }.to_h
|
141
|
+
super msg
|
142
|
+
end
|
143
|
+
end
|
144
|
+
class ErrorNotEnoughNodes < logging_error ; end
|
145
|
+
|
146
|
+
private def split heuristics, hh, ww, tt, ll
|
147
|
+
logger = Module.nesting.first.logger
|
148
|
+
|
149
|
+
unstale = unless defined? Selenium::WebDriver::Error::StaleElementReferenceError
|
150
|
+
->(&b){ b.call }
|
151
|
+
else
|
152
|
+
lambda do |&try|
|
153
|
+
t = Time.now
|
154
|
+
begin
|
155
|
+
try.call
|
156
|
+
rescue Selenium::WebDriver::Error::StaleElementReferenceError
|
157
|
+
raise if Time.now > t + 10
|
158
|
+
retry
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
162
|
+
all = unstale.call do recognize_more end.sort_by(&tt)
|
163
|
+
logger.info "all nodes: #{all.size}"
|
164
|
+
rect = page.evaluate("( function(node) { return JSON.parse(JSON.stringify(node.getBoundingClientRect())) } )(arguments[0])", self)
|
165
|
+
inside = all.reject{ |i| i.left < rect["left"] || i.left + i.width > rect["right"] || i.top < rect["top"] || i.top + i.height > rect["bottom"] }
|
166
|
+
raise ErrorNotEnoughNodes.new "no inside nodes", all: all, inside: inside if inside.empty?
|
167
|
+
logger.info "inside nodes: #{inside.size}"
|
168
|
+
nodes = unstale.call do inside.reject{ |i| %w{ button script svg path a img span }.include? i.node.tag_name } end.uniq{ |i| [i[hh], i[ww], i[tt], i[ll]] }
|
169
|
+
logger.info "good nodes: #{nodes.size}" # only those that might be containers
|
170
|
+
|
171
|
+
large = nodes#.select{ |i| i[ww] > nodes.map(&ww).max / 4 }
|
172
|
+
logger.info "large enough and unique: #{large.size}"
|
173
|
+
|
174
|
+
interfere = lambda do |a, b|
|
175
|
+
a[tt] < b[tt] + b[hh] &&
|
176
|
+
b[tt] < a[tt] + a[hh]
|
177
|
+
end
|
178
|
+
|
179
|
+
rest = large.select.with_index do |a, i|
|
180
|
+
large.each_with_index.none? do |b, j|
|
181
|
+
next if i == j
|
182
|
+
a[tt] >= b[tt] && a[tt] + a[hh] <= b[tt] + b[hh] &&
|
183
|
+
large.all?{ |c| interfere[a, c] == interfere[b, c] }
|
184
|
+
end
|
185
|
+
end
|
186
|
+
logger.info "not nested: #{rest.size}"
|
187
|
+
# rest = rest.sample 50
|
188
|
+
|
189
|
+
# adding the :area field for faster upcoming computations
|
190
|
+
struct = Struct.new *large.first.members, :area
|
191
|
+
rest.map!{ |i| struct.new *i.values, i.width * i.height }
|
192
|
+
|
193
|
+
require "pcbr"
|
194
|
+
pcbr = PCBR.new
|
195
|
+
is = []
|
196
|
+
max, past = 0, []
|
197
|
+
prev = nil
|
198
|
+
time = Time.now
|
199
|
+
loop do
|
200
|
+
rest.each_with_index do |node, i|
|
201
|
+
next if is.any?{ |j| i == j || interfere[rest[i], rest[j]] }
|
202
|
+
sol = rest.values_at *is, i
|
203
|
+
pcbr.store [*is, i].sort, [
|
204
|
+
*( is.size if heuristics.include? :SIZE ),
|
205
|
+
*( sol.map(&:area).inject(:+) if heuristics.include? :AREA ),
|
206
|
+
*( -sol.product(sol).map{ |s1, s2| (s1.width - s2.width ).abs }.inject(:+) / sol.size / sol.size if heuristics.include? :WIDTH ),
|
207
|
+
*( -sol.product(sol).map{ |s1, s2| (s1.height - s2.height ).abs }.inject(:+) / sol.size / sol.size if heuristics.include? :HEIGHT ),
|
208
|
+
*( -sol.product(sol).map{ |s1, s2| (s1[ll] + s1[ww] / 2.0 - s2[ll] - s2[ww] / 2.0).abs }.inject(:+) / sol.size / sol.size if heuristics.include? :MIDDLE ),
|
209
|
+
] unless pcbr.table.assoc [*is, i].sort
|
210
|
+
end
|
211
|
+
if prev && Time.now - time > 1 && (Time.now - prev > (prev - time))
|
212
|
+
m = pcbr.table.reject{ |i| i.first.size == 1 }.map(&:last).max
|
213
|
+
break if 1 == pcbr.table.count{ |i| i.last == m } || Time.now - time > 5
|
214
|
+
end
|
215
|
+
break unless t = pcbr.table.reject{ |is,| past.include? is.map{ |i| 2**i }.inject(:+) }.max_by(&:last)
|
216
|
+
if t.last > max
|
217
|
+
prev, max = Time.now, t.last
|
218
|
+
logger.debug [Time.now - time, max, t.first]
|
219
|
+
end
|
220
|
+
past.push (is = t.first).map{ |i| 2**i }.inject(:+)
|
221
|
+
end
|
222
|
+
# TODO: if multiple with max score, take the max by area
|
223
|
+
unless best = pcbr.table.reject{ |is,| is.size == 1 }.max_by(&:last)
|
224
|
+
raise ErrorNotEnoughNodes.new "failed to split <#{tag_name}>", all: all, inside: inside, nodes: nodes, large: large, rest: rest
|
225
|
+
end
|
226
|
+
rest.values_at(*best.first).extend(Dumpable)
|
227
|
+
end
|
228
|
+
|
229
|
+
def rows *heuristics
|
230
|
+
heuristics = %i{ AREA HEIGHT WIDTH } if heuristics.empty?
|
231
|
+
split heuristics, :height, :width, :top, :left
|
232
|
+
end
|
233
|
+
def cols *heuristics
|
234
|
+
heuristics = %i{ AREA HEIGHT WIDTH } if heuristics.empty?
|
235
|
+
split heuristics, :width, :height, :left, :top
|
236
|
+
end
|
237
|
+
|
238
|
+
|
239
|
+
end
|
240
|
+
|
241
|
+
if defined? Ferrum::Frame::Runtime
|
242
|
+
Ferrum::Node.include PageRecognizer
|
243
|
+
Ferrum::Frame::Runtime.module_eval do
|
244
|
+
def cyclic? object_id
|
245
|
+
@page.command "Runtime.callFunctionOn", objectId: object_id, returnByValue: true, functionDeclaration: "function(){return false}"
|
246
|
+
end
|
247
|
+
end
|
248
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
Gem::Specification.new do |spec|
|
2
|
+
spec.name = "pagerecognizer"
|
3
|
+
spec.version = "0.0.1"
|
4
|
+
spec.summary = "visual HTML page structure recognizer"
|
5
|
+
|
6
|
+
spec.author = "Victor Maslov aka Nakilon"
|
7
|
+
spec.email = "nakilon@gmail.com"
|
8
|
+
spec.license = "MIT"
|
9
|
+
spec.metadata = {"source_code_uri" => "https://github.com/nakilon/pagerecognizer"}
|
10
|
+
|
11
|
+
spec.add_dependency "nokogiri"
|
12
|
+
spec.add_dependency "pcbr"
|
13
|
+
spec.add_dependency "ferrum"
|
14
|
+
spec.add_development_dependency "minitest"
|
15
|
+
|
16
|
+
spec.add_development_dependency "ruby-prof"
|
17
|
+
spec.add_development_dependency "byebug"
|
18
|
+
spec.add_development_dependency "mll"
|
19
|
+
|
20
|
+
spec.require_path = "lib"
|
21
|
+
spec.test_file = "test.rb"
|
22
|
+
spec.files = %w{ LICENSE pagerecognizer.gemspec lib/pagerecognizer.rb }
|
23
|
+
end
|
data/test.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
require "minitest/autorun"
|
2
|
+
require "ferrum"
|
3
|
+
require_relative "lib/pagerecognizer"
|
4
|
+
Ferrum::Node.include PageRecognizer
|
5
|
+
|
6
|
+
describe PageRecognizer do
|
7
|
+
it "google" do
|
8
|
+
browser = Ferrum::Browser.new **(ENV.has_key?("FERRUM_NO_SANDBOX") ? {browser_options: {"no-sandbox": nil}} : {})
|
9
|
+
browser.goto "about:blank"
|
10
|
+
browser.execute "document.write(#{File.read("google.htm").inspect})"
|
11
|
+
results = browser.at_css("body").rows
|
12
|
+
width = results.group_by(&:width).max_by{ |w, g| g.size }.first
|
13
|
+
assert_equal [
|
14
|
+
["https://www.ruby-lang.org/ru/", "Ruby это... динамический язык программирования с о"],
|
15
|
+
["https://ru.wikibooks.org/wiki/Ruby", "Этот учебник намерен осветить все тонкости програм"],
|
16
|
+
["https://habr.com/ru/post/433672/", "19 дек. 2018 г. - Взрывной рост интереса к Ruby ос"],
|
17
|
+
["https://habr.com/ru/hub/ruby/", "Ruby (англ. Ruby — «Рубин») — динамический, рефлек"],
|
18
|
+
["https://web-creator.ru/articles/ruby", "Ruby разрабатывался на Linux, но работает на многи"],
|
19
|
+
["http://rusrails.ru/", "Ruby on Rails руководства, учебники, статьи на рус"],
|
20
|
+
["https://vc.ru/dev/72391-pochemu-my-vybiraem-ruby-d", "20 июн. 2019 г. - Ruby on Rails одним из первых на"],
|
21
|
+
["https://tproger.ru/tag/ruby/", "Django или Ruby on Rails: какой фреймворк выбрать?"],
|
22
|
+
["https://rubyrussia.club/", "Главная российская конференция о Ruby. Расширяем г"]
|
23
|
+
], results.select{ |r| r.width == width }.map(&:node).map(&:rows).map{ |link, desc| [
|
24
|
+
link.node.at_css("a").property("href")[0,50],
|
25
|
+
desc.node.text[0,50],
|
26
|
+
] }
|
27
|
+
end
|
28
|
+
end
|
metadata
ADDED
@@ -0,0 +1,147 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: pagerecognizer
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Victor Maslov aka Nakilon
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2020-09-24 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: nokogiri
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: pcbr
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: ferrum
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: minitest
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: ruby-prof
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: byebug
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: mll
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
111
|
+
description:
|
112
|
+
email: nakilon@gmail.com
|
113
|
+
executables: []
|
114
|
+
extensions: []
|
115
|
+
extra_rdoc_files: []
|
116
|
+
files:
|
117
|
+
- LICENSE
|
118
|
+
- lib/pagerecognizer.rb
|
119
|
+
- pagerecognizer.gemspec
|
120
|
+
- test.rb
|
121
|
+
homepage:
|
122
|
+
licenses:
|
123
|
+
- MIT
|
124
|
+
metadata:
|
125
|
+
source_code_uri: https://github.com/nakilon/pagerecognizer
|
126
|
+
post_install_message:
|
127
|
+
rdoc_options: []
|
128
|
+
require_paths:
|
129
|
+
- lib
|
130
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
131
|
+
requirements:
|
132
|
+
- - ">="
|
133
|
+
- !ruby/object:Gem::Version
|
134
|
+
version: '0'
|
135
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
136
|
+
requirements:
|
137
|
+
- - ">="
|
138
|
+
- !ruby/object:Gem::Version
|
139
|
+
version: '0'
|
140
|
+
requirements: []
|
141
|
+
rubyforge_project:
|
142
|
+
rubygems_version: 2.5.2.3
|
143
|
+
signing_key:
|
144
|
+
specification_version: 4
|
145
|
+
summary: visual HTML page structure recognizer
|
146
|
+
test_files:
|
147
|
+
- test.rb
|