qnd_html2page 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- checksums.yaml.gz.sig +0 -0
- data/lib/qnd_html2page.rb +97 -0
- data.tar.gz.sig +0 -0
- metadata +90 -0
- metadata.gz.sig +0 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: cd082fe1f0ecffa42aa3f42ad0c8825aa71505d3c1dfb148cbfdc209c2871836
|
4
|
+
data.tar.gz: 507203ae52228dd19d1b68e554399c0fbfe4c4baf6fec969b46d515b28c1b937
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 0f16a5c52a7afee1a6a3ea478a7fda44b6aef7950b75ad96d60fea5a4c838b4e4c10c681af8e2714ea5907686bb6d6a611296d4c5840f02195e0c592e7ccd37a
|
7
|
+
data.tar.gz: af07a61b492b13f794d7e4d5572bed280876434da1c95de6a96dc0612828384b7d90fe2dfdf7ac8490e8cfadeffddb373ae0f8054a5fc60df1e9193f135c190f
|
checksums.yaml.gz.sig
ADDED
Binary file
|
@@ -0,0 +1,97 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# file: qnd_html2page.rb
|
4
|
+
|
5
|
+
require 'rexle'
|
6
|
+
require 'ferrum'
|
7
|
+
require 'tempfile'
|
8
|
+
require 'rxfhelper'
|
9
|
+
|
10
|
+
|
11
|
+
|
12
|
+
class QndHtml2Page
|
13
|
+
|
14
|
+
attr_reader :to_pages
|
15
|
+
|
16
|
+
def initialize(html, debug: false, pg_height: 676)
|
17
|
+
|
18
|
+
@html, @height, @debug = html, pg_height, debug
|
19
|
+
@to_pages = scan(RXFHelper.read(@html).first)
|
20
|
+
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def scan(html)
|
26
|
+
|
27
|
+
# add the span tag after every element in the body
|
28
|
+
|
29
|
+
doc = Rexle.new(html)
|
30
|
+
body = doc.root.element('body')
|
31
|
+
|
32
|
+
count = 0
|
33
|
+
body.each_recursive do |e|
|
34
|
+
|
35
|
+
puts 'e: ' + e.name if @debug
|
36
|
+
ignore_list = %w(span b li tr td dt dd em strong i a)
|
37
|
+
next if ignore_list.include? e.name
|
38
|
+
span = Rexle::Element.new('span').add_text(count.to_s)
|
39
|
+
span.attributes[:class] = 'qndhtml2pg'
|
40
|
+
e.insert_after span
|
41
|
+
count += 1
|
42
|
+
|
43
|
+
end
|
44
|
+
|
45
|
+
# Fetch the y coordinate of every span tag to determine the
|
46
|
+
# elements that can fit into each page.
|
47
|
+
|
48
|
+
tmpfile = Tempfile.new('browser')
|
49
|
+
File.write tmpfile.path + '.html', doc.root.xml
|
50
|
+
|
51
|
+
browser = Ferrum::Browser.new
|
52
|
+
browser.goto('file://' + tmpfile.path + '.html')
|
53
|
+
span_list = browser.xpath('//span')
|
54
|
+
a = span_list.map {|x| [x.text, x.find_position.last] }
|
55
|
+
|
56
|
+
offset = 0
|
57
|
+
|
58
|
+
a2 = a.inject([[]]) do |r,x|
|
59
|
+
|
60
|
+
puts 'r: ' + x.inspect if @debug
|
61
|
+
puts 'x: ' + x.inspect if @debug
|
62
|
+
x.last < offset + @height ? (r.last << x) : (offset = x.last; r << [x])
|
63
|
+
r
|
64
|
+
|
65
|
+
end
|
66
|
+
|
67
|
+
|
68
|
+
elements = doc.root.element('body').elements.to_a
|
69
|
+
puts 'elements.length: ' + elements.length.inspect if @debug
|
70
|
+
offset = 0
|
71
|
+
|
72
|
+
puts 'a2: ' + a2.inspect if @debug
|
73
|
+
|
74
|
+
pages = a2.map do |x|
|
75
|
+
|
76
|
+
id = x.last.first
|
77
|
+
|
78
|
+
puts 'id: ' + id.inspect if @debug
|
79
|
+
puts 'offset: ' + offset.inspect if @debug
|
80
|
+
|
81
|
+
a3 = elements[offset..-1].take_while do |e|
|
82
|
+
puts 'e.text: ' + e.text.inspect
|
83
|
+
e.text != id
|
84
|
+
end
|
85
|
+
|
86
|
+
offset = a3.length
|
87
|
+
div = Rexle::Element.new 'div'
|
88
|
+
a3.reject! {|e| e.name == 'span' and e.attributes[:class] == 'qndhtml2pg' }
|
89
|
+
a3.each {|e| div.add e}
|
90
|
+
div
|
91
|
+
end
|
92
|
+
|
93
|
+
@to_pages = pages
|
94
|
+
|
95
|
+
end
|
96
|
+
|
97
|
+
end
|
data.tar.gz.sig
ADDED
Binary file
|
metadata
ADDED
@@ -0,0 +1,90 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: qnd_html2page
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- James Robertson
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain:
|
11
|
+
- |
|
12
|
+
-----BEGIN CERTIFICATE-----
|
13
|
+
MIIEXjCCAsagAwIBAgIBATANBgkqhkiG9w0BAQsFADAsMSowKAYDVQQDDCFnZW1t
|
14
|
+
YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMTkxMTE1MTAzMzE4WhcN
|
15
|
+
MjAxMTE0MTAzMzE4WjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
|
16
|
+
cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQDja3r/
|
17
|
+
qSzQNOzRg4qp9obwEq37YTr0gCLF5QCiAyjhfBqxv/OBPg7b+06PyOPShMkIbLYy
|
18
|
+
gnRhgzvDvDf2kpS/9ziE2bmOCp8MimWS0I0aT77qbEho6knw5BfEsPEZfG5oVtmy
|
19
|
+
au5iPivNst1sOKMTw6v2OMeiXx0a5fxZJtMnKr8/w46HKB3bpZ9m2+JDnx7cZ+Fh
|
20
|
+
RF0XTuQz79uHv1T+4oTU/DrBRqqlIASDVPmeyfOlkwTPQcPE875LclbDoBWz5vMT
|
21
|
+
kkTETVHbgB+D6i6mV9Sp896/noPP8JH84D+NvqgUppbRdUTiT+WnCFTV4ooJkACq
|
22
|
+
WC5twvtJuuvMOv7WsfsTqdZLJOm1g19JU9V7xYNxFbHQNHoLNomRKZ+NYjESkhVV
|
23
|
+
U4w4ILlvPHLfEPgeZlVGSa835XWabBwNVLXLUW/ZmlEQTz+u8isbtAk7kKbWaPUF
|
24
|
+
mEHhDcc2UG6l7uGe8E/jjOI8UufkuDFr/06QTf/azeu4AHWzGCG22MdhX3ECAwEA
|
25
|
+
AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQUNwHbSmuk
|
26
|
+
gigzYYMStlUkMnT8CzcwJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
|
27
|
+
c29uLmV1MCYGA1UdEgQfMB2BG2dlbW1hc3RlckBqYW1lc3JvYmVydHNvbi5ldTAN
|
28
|
+
BgkqhkiG9w0BAQsFAAOCAYEADCXJFuzQ8G2QrTRxJUJiWZq8ayjfFHfhr5cqvxUy
|
29
|
+
ZZ6CsT02ffAU6W7116mIBTPLD1tS5ZAws8fJ3gs+vSLJAYTteUX2ykyJLlF+cjZz
|
30
|
+
i1T2z6R6VWPNAm4wcAdY96f3Dgq/2g3lq8xdwf26XITYFKGiqMnE9IGJ+J6V7+vr
|
31
|
+
szMrxeV30iUhoeSNuM57pdDIIMWbSLSavUeoqcuTYVa9zSqz5LBdk85ibf637NbT
|
32
|
+
A0dlTm3pbiomqQIVGoRr/Gj1Dz2/dHzndXS3k8hg4iIvrMLRymIghh7AZrJcMnpM
|
33
|
+
YSVGl4kHZBGFTnkgJEnyjA0FGgDJcyJEzQJyTWzjO3TkX8pYvwyKk6ILLBHANAIQ
|
34
|
+
IHFo8pGEa/bYN7Si64eTfNtXBnEHQC1A15AvXtzjsBtqIA2e1gsWCbG83s2M9GoC
|
35
|
+
iHadjaBK6nUXzIZ2OCEldp3dzkozgJxu7tcb1Kmr9uAUFoot4w6yl+Kr4JZIW1ml
|
36
|
+
u3jdsCCQLM9dC78qib2fY8vh
|
37
|
+
-----END CERTIFICATE-----
|
38
|
+
date: 2019-11-15 00:00:00.000000000 Z
|
39
|
+
dependencies:
|
40
|
+
- !ruby/object:Gem::Dependency
|
41
|
+
name: rxfhelper
|
42
|
+
requirement: !ruby/object:Gem::Requirement
|
43
|
+
requirements:
|
44
|
+
- - "~>"
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '0.9'
|
47
|
+
- - ">="
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: 0.9.4
|
50
|
+
type: :runtime
|
51
|
+
prerelease: false
|
52
|
+
version_requirements: !ruby/object:Gem::Requirement
|
53
|
+
requirements:
|
54
|
+
- - "~>"
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: '0.9'
|
57
|
+
- - ">="
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: 0.9.4
|
60
|
+
description:
|
61
|
+
email: james@jamesrobertson.eu
|
62
|
+
executables: []
|
63
|
+
extensions: []
|
64
|
+
extra_rdoc_files: []
|
65
|
+
files:
|
66
|
+
- lib/qnd_html2page.rb
|
67
|
+
homepage: https://github.com/jrobertson/qnd_html2page
|
68
|
+
licenses:
|
69
|
+
- MIT
|
70
|
+
metadata: {}
|
71
|
+
post_install_message:
|
72
|
+
rdoc_options: []
|
73
|
+
require_paths:
|
74
|
+
- lib
|
75
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
76
|
+
requirements:
|
77
|
+
- - ">="
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: '0'
|
80
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
81
|
+
requirements:
|
82
|
+
- - ">="
|
83
|
+
- !ruby/object:Gem::Version
|
84
|
+
version: '0'
|
85
|
+
requirements: []
|
86
|
+
rubygems_version: 3.0.3
|
87
|
+
signing_key:
|
88
|
+
specification_version: 4
|
89
|
+
summary: Splits HTML into pages suitable for reading like a book.
|
90
|
+
test_files: []
|
metadata.gz.sig
ADDED
Binary file
|