qnd_html2page 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- checksums.yaml.gz.sig +0 -0
- data/lib/qnd_html2page.rb +97 -0
- data.tar.gz.sig +0 -0
- metadata +90 -0
- metadata.gz.sig +0 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: cd082fe1f0ecffa42aa3f42ad0c8825aa71505d3c1dfb148cbfdc209c2871836
|
4
|
+
data.tar.gz: 507203ae52228dd19d1b68e554399c0fbfe4c4baf6fec969b46d515b28c1b937
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 0f16a5c52a7afee1a6a3ea478a7fda44b6aef7950b75ad96d60fea5a4c838b4e4c10c681af8e2714ea5907686bb6d6a611296d4c5840f02195e0c592e7ccd37a
|
7
|
+
data.tar.gz: af07a61b492b13f794d7e4d5572bed280876434da1c95de6a96dc0612828384b7d90fe2dfdf7ac8490e8cfadeffddb373ae0f8054a5fc60df1e9193f135c190f
|
checksums.yaml.gz.sig
ADDED
Binary file
|
@@ -0,0 +1,97 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# file: qnd_html2page.rb
|
4
|
+
|
5
|
+
require 'rexle'
|
6
|
+
require 'ferrum'
|
7
|
+
require 'tempfile'
|
8
|
+
require 'rxfhelper'
|
9
|
+
|
10
|
+
|
11
|
+
|
12
|
+
class QndHtml2Page
|
13
|
+
|
14
|
+
attr_reader :to_pages
|
15
|
+
|
16
|
+
def initialize(html, debug: false, pg_height: 676)
|
17
|
+
|
18
|
+
@html, @height, @debug = html, pg_height, debug
|
19
|
+
@to_pages = scan(RXFHelper.read(@html).first)
|
20
|
+
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def scan(html)
|
26
|
+
|
27
|
+
# add the span tag after every element in the body
|
28
|
+
|
29
|
+
doc = Rexle.new(html)
|
30
|
+
body = doc.root.element('body')
|
31
|
+
|
32
|
+
count = 0
|
33
|
+
body.each_recursive do |e|
|
34
|
+
|
35
|
+
puts 'e: ' + e.name if @debug
|
36
|
+
ignore_list = %w(span b li tr td dt dd em strong i a)
|
37
|
+
next if ignore_list.include? e.name
|
38
|
+
span = Rexle::Element.new('span').add_text(count.to_s)
|
39
|
+
span.attributes[:class] = 'qndhtml2pg'
|
40
|
+
e.insert_after span
|
41
|
+
count += 1
|
42
|
+
|
43
|
+
end
|
44
|
+
|
45
|
+
# Fetch the y coordinate of every span tag to determine the
|
46
|
+
# elements that can fit into each page.
|
47
|
+
|
48
|
+
tmpfile = Tempfile.new('browser')
|
49
|
+
File.write tmpfile.path + '.html', doc.root.xml
|
50
|
+
|
51
|
+
browser = Ferrum::Browser.new
|
52
|
+
browser.goto('file://' + tmpfile.path + '.html')
|
53
|
+
span_list = browser.xpath('//span')
|
54
|
+
a = span_list.map {|x| [x.text, x.find_position.last] }
|
55
|
+
|
56
|
+
offset = 0
|
57
|
+
|
58
|
+
a2 = a.inject([[]]) do |r,x|
|
59
|
+
|
60
|
+
puts 'r: ' + x.inspect if @debug
|
61
|
+
puts 'x: ' + x.inspect if @debug
|
62
|
+
x.last < offset + @height ? (r.last << x) : (offset = x.last; r << [x])
|
63
|
+
r
|
64
|
+
|
65
|
+
end
|
66
|
+
|
67
|
+
|
68
|
+
elements = doc.root.element('body').elements.to_a
|
69
|
+
puts 'elements.length: ' + elements.length.inspect if @debug
|
70
|
+
offset = 0
|
71
|
+
|
72
|
+
puts 'a2: ' + a2.inspect if @debug
|
73
|
+
|
74
|
+
pages = a2.map do |x|
|
75
|
+
|
76
|
+
id = x.last.first
|
77
|
+
|
78
|
+
puts 'id: ' + id.inspect if @debug
|
79
|
+
puts 'offset: ' + offset.inspect if @debug
|
80
|
+
|
81
|
+
a3 = elements[offset..-1].take_while do |e|
|
82
|
+
puts 'e.text: ' + e.text.inspect
|
83
|
+
e.text != id
|
84
|
+
end
|
85
|
+
|
86
|
+
offset = a3.length
|
87
|
+
div = Rexle::Element.new 'div'
|
88
|
+
a3.reject! {|e| e.name == 'span' and e.attributes[:class] == 'qndhtml2pg' }
|
89
|
+
a3.each {|e| div.add e}
|
90
|
+
div
|
91
|
+
end
|
92
|
+
|
93
|
+
@to_pages = pages
|
94
|
+
|
95
|
+
end
|
96
|
+
|
97
|
+
end
|
data.tar.gz.sig
ADDED
Binary file
|
metadata
ADDED
@@ -0,0 +1,90 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: qnd_html2page
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- James Robertson
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain:
|
11
|
+
- |
|
12
|
+
-----BEGIN CERTIFICATE-----
|
13
|
+
MIIEXjCCAsagAwIBAgIBATANBgkqhkiG9w0BAQsFADAsMSowKAYDVQQDDCFnZW1t
|
14
|
+
YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMTkxMTE1MTAzMzE4WhcN
|
15
|
+
MjAxMTE0MTAzMzE4WjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
|
16
|
+
cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQDja3r/
|
17
|
+
qSzQNOzRg4qp9obwEq37YTr0gCLF5QCiAyjhfBqxv/OBPg7b+06PyOPShMkIbLYy
|
18
|
+
gnRhgzvDvDf2kpS/9ziE2bmOCp8MimWS0I0aT77qbEho6knw5BfEsPEZfG5oVtmy
|
19
|
+
au5iPivNst1sOKMTw6v2OMeiXx0a5fxZJtMnKr8/w46HKB3bpZ9m2+JDnx7cZ+Fh
|
20
|
+
RF0XTuQz79uHv1T+4oTU/DrBRqqlIASDVPmeyfOlkwTPQcPE875LclbDoBWz5vMT
|
21
|
+
kkTETVHbgB+D6i6mV9Sp896/noPP8JH84D+NvqgUppbRdUTiT+WnCFTV4ooJkACq
|
22
|
+
WC5twvtJuuvMOv7WsfsTqdZLJOm1g19JU9V7xYNxFbHQNHoLNomRKZ+NYjESkhVV
|
23
|
+
U4w4ILlvPHLfEPgeZlVGSa835XWabBwNVLXLUW/ZmlEQTz+u8isbtAk7kKbWaPUF
|
24
|
+
mEHhDcc2UG6l7uGe8E/jjOI8UufkuDFr/06QTf/azeu4AHWzGCG22MdhX3ECAwEA
|
25
|
+
AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQUNwHbSmuk
|
26
|
+
gigzYYMStlUkMnT8CzcwJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
|
27
|
+
c29uLmV1MCYGA1UdEgQfMB2BG2dlbW1hc3RlckBqYW1lc3JvYmVydHNvbi5ldTAN
|
28
|
+
BgkqhkiG9w0BAQsFAAOCAYEADCXJFuzQ8G2QrTRxJUJiWZq8ayjfFHfhr5cqvxUy
|
29
|
+
ZZ6CsT02ffAU6W7116mIBTPLD1tS5ZAws8fJ3gs+vSLJAYTteUX2ykyJLlF+cjZz
|
30
|
+
i1T2z6R6VWPNAm4wcAdY96f3Dgq/2g3lq8xdwf26XITYFKGiqMnE9IGJ+J6V7+vr
|
31
|
+
szMrxeV30iUhoeSNuM57pdDIIMWbSLSavUeoqcuTYVa9zSqz5LBdk85ibf637NbT
|
32
|
+
A0dlTm3pbiomqQIVGoRr/Gj1Dz2/dHzndXS3k8hg4iIvrMLRymIghh7AZrJcMnpM
|
33
|
+
YSVGl4kHZBGFTnkgJEnyjA0FGgDJcyJEzQJyTWzjO3TkX8pYvwyKk6ILLBHANAIQ
|
34
|
+
IHFo8pGEa/bYN7Si64eTfNtXBnEHQC1A15AvXtzjsBtqIA2e1gsWCbG83s2M9GoC
|
35
|
+
iHadjaBK6nUXzIZ2OCEldp3dzkozgJxu7tcb1Kmr9uAUFoot4w6yl+Kr4JZIW1ml
|
36
|
+
u3jdsCCQLM9dC78qib2fY8vh
|
37
|
+
-----END CERTIFICATE-----
|
38
|
+
date: 2019-11-15 00:00:00.000000000 Z
|
39
|
+
dependencies:
|
40
|
+
- !ruby/object:Gem::Dependency
|
41
|
+
name: rxfhelper
|
42
|
+
requirement: !ruby/object:Gem::Requirement
|
43
|
+
requirements:
|
44
|
+
- - "~>"
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '0.9'
|
47
|
+
- - ">="
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: 0.9.4
|
50
|
+
type: :runtime
|
51
|
+
prerelease: false
|
52
|
+
version_requirements: !ruby/object:Gem::Requirement
|
53
|
+
requirements:
|
54
|
+
- - "~>"
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: '0.9'
|
57
|
+
- - ">="
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: 0.9.4
|
60
|
+
description:
|
61
|
+
email: james@jamesrobertson.eu
|
62
|
+
executables: []
|
63
|
+
extensions: []
|
64
|
+
extra_rdoc_files: []
|
65
|
+
files:
|
66
|
+
- lib/qnd_html2page.rb
|
67
|
+
homepage: https://github.com/jrobertson/qnd_html2page
|
68
|
+
licenses:
|
69
|
+
- MIT
|
70
|
+
metadata: {}
|
71
|
+
post_install_message:
|
72
|
+
rdoc_options: []
|
73
|
+
require_paths:
|
74
|
+
- lib
|
75
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
76
|
+
requirements:
|
77
|
+
- - ">="
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: '0'
|
80
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
81
|
+
requirements:
|
82
|
+
- - ">="
|
83
|
+
- !ruby/object:Gem::Version
|
84
|
+
version: '0'
|
85
|
+
requirements: []
|
86
|
+
rubygems_version: 3.0.3
|
87
|
+
signing_key:
|
88
|
+
specification_version: 4
|
89
|
+
summary: Splits HTML into pages suitable for reading like a book.
|
90
|
+
test_files: []
|
metadata.gz.sig
ADDED
Binary file
|