qnd_html2page 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: cd082fe1f0ecffa42aa3f42ad0c8825aa71505d3c1dfb148cbfdc209c2871836
4
+ data.tar.gz: 507203ae52228dd19d1b68e554399c0fbfe4c4baf6fec969b46d515b28c1b937
5
+ SHA512:
6
+ metadata.gz: 0f16a5c52a7afee1a6a3ea478a7fda44b6aef7950b75ad96d60fea5a4c838b4e4c10c681af8e2714ea5907686bb6d6a611296d4c5840f02195e0c592e7ccd37a
7
+ data.tar.gz: af07a61b492b13f794d7e4d5572bed280876434da1c95de6a96dc0612828384b7d90fe2dfdf7ac8490e8cfadeffddb373ae0f8054a5fc60df1e9193f135c190f
checksums.yaml.gz.sig ADDED
Binary file
@@ -0,0 +1,97 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # file: qnd_html2page.rb
4
+
5
+ require 'rexle'
6
+ require 'ferrum'
7
+ require 'tempfile'
8
+ require 'rxfhelper'
9
+
10
+
11
+
12
+ class QndHtml2Page
13
+
14
+ attr_reader :to_pages
15
+
16
+ def initialize(html, debug: false, pg_height: 676)
17
+
18
+ @html, @height, @debug = html, pg_height, debug
19
+ @to_pages = scan(RXFHelper.read(@html).first)
20
+
21
+ end
22
+
23
+ private
24
+
25
+ def scan(html)
26
+
27
+ # add the span tag after every element in the body
28
+
29
+ doc = Rexle.new(html)
30
+ body = doc.root.element('body')
31
+
32
+ count = 0
33
+ body.each_recursive do |e|
34
+
35
+ puts 'e: ' + e.name if @debug
36
+ ignore_list = %w(span b li tr td dt dd em strong i a)
37
+ next if ignore_list.include? e.name
38
+ span = Rexle::Element.new('span').add_text(count.to_s)
39
+ span.attributes[:class] = 'qndhtml2pg'
40
+ e.insert_after span
41
+ count += 1
42
+
43
+ end
44
+
45
+ # Fetch the y coordinate of every span tag to determine the
46
+ # elements that can fit into each page.
47
+
48
+ tmpfile = Tempfile.new('browser')
49
+ File.write tmpfile.path + '.html', doc.root.xml
50
+
51
+ browser = Ferrum::Browser.new
52
+ browser.goto('file://' + tmpfile.path + '.html')
53
+ span_list = browser.xpath('//span')
54
+ a = span_list.map {|x| [x.text, x.find_position.last] }
55
+
56
+ offset = 0
57
+
58
+ a2 = a.inject([[]]) do |r,x|
59
+
60
+ puts 'r: ' + x.inspect if @debug
61
+ puts 'x: ' + x.inspect if @debug
62
+ x.last < offset + @height ? (r.last << x) : (offset = x.last; r << [x])
63
+ r
64
+
65
+ end
66
+
67
+
68
+ elements = doc.root.element('body').elements.to_a
69
+ puts 'elements.length: ' + elements.length.inspect if @debug
70
+ offset = 0
71
+
72
+ puts 'a2: ' + a2.inspect if @debug
73
+
74
+ pages = a2.map do |x|
75
+
76
+ id = x.last.first
77
+
78
+ puts 'id: ' + id.inspect if @debug
79
+ puts 'offset: ' + offset.inspect if @debug
80
+
81
+ a3 = elements[offset..-1].take_while do |e|
82
+ puts 'e.text: ' + e.text.inspect
83
+ e.text != id
84
+ end
85
+
86
+ offset = a3.length
87
+ div = Rexle::Element.new 'div'
88
+ a3.reject! {|e| e.name == 'span' and e.attributes[:class] == 'qndhtml2pg' }
89
+ a3.each {|e| div.add e}
90
+ div
91
+ end
92
+
93
+ @to_pages = pages
94
+
95
+ end
96
+
97
+ end
data.tar.gz.sig ADDED
Binary file
metadata ADDED
@@ -0,0 +1,90 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: qnd_html2page
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - James Robertson
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain:
11
+ - |
12
+ -----BEGIN CERTIFICATE-----
13
+ MIIEXjCCAsagAwIBAgIBATANBgkqhkiG9w0BAQsFADAsMSowKAYDVQQDDCFnZW1t
14
+ YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMTkxMTE1MTAzMzE4WhcN
15
+ MjAxMTE0MTAzMzE4WjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
16
+ cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQDja3r/
17
+ qSzQNOzRg4qp9obwEq37YTr0gCLF5QCiAyjhfBqxv/OBPg7b+06PyOPShMkIbLYy
18
+ gnRhgzvDvDf2kpS/9ziE2bmOCp8MimWS0I0aT77qbEho6knw5BfEsPEZfG5oVtmy
19
+ au5iPivNst1sOKMTw6v2OMeiXx0a5fxZJtMnKr8/w46HKB3bpZ9m2+JDnx7cZ+Fh
20
+ RF0XTuQz79uHv1T+4oTU/DrBRqqlIASDVPmeyfOlkwTPQcPE875LclbDoBWz5vMT
21
+ kkTETVHbgB+D6i6mV9Sp896/noPP8JH84D+NvqgUppbRdUTiT+WnCFTV4ooJkACq
22
+ WC5twvtJuuvMOv7WsfsTqdZLJOm1g19JU9V7xYNxFbHQNHoLNomRKZ+NYjESkhVV
23
+ U4w4ILlvPHLfEPgeZlVGSa835XWabBwNVLXLUW/ZmlEQTz+u8isbtAk7kKbWaPUF
24
+ mEHhDcc2UG6l7uGe8E/jjOI8UufkuDFr/06QTf/azeu4AHWzGCG22MdhX3ECAwEA
25
+ AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQUNwHbSmuk
26
+ gigzYYMStlUkMnT8CzcwJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
27
+ c29uLmV1MCYGA1UdEgQfMB2BG2dlbW1hc3RlckBqYW1lc3JvYmVydHNvbi5ldTAN
28
+ BgkqhkiG9w0BAQsFAAOCAYEADCXJFuzQ8G2QrTRxJUJiWZq8ayjfFHfhr5cqvxUy
29
+ ZZ6CsT02ffAU6W7116mIBTPLD1tS5ZAws8fJ3gs+vSLJAYTteUX2ykyJLlF+cjZz
30
+ i1T2z6R6VWPNAm4wcAdY96f3Dgq/2g3lq8xdwf26XITYFKGiqMnE9IGJ+J6V7+vr
31
+ szMrxeV30iUhoeSNuM57pdDIIMWbSLSavUeoqcuTYVa9zSqz5LBdk85ibf637NbT
32
+ A0dlTm3pbiomqQIVGoRr/Gj1Dz2/dHzndXS3k8hg4iIvrMLRymIghh7AZrJcMnpM
33
+ YSVGl4kHZBGFTnkgJEnyjA0FGgDJcyJEzQJyTWzjO3TkX8pYvwyKk6ILLBHANAIQ
34
+ IHFo8pGEa/bYN7Si64eTfNtXBnEHQC1A15AvXtzjsBtqIA2e1gsWCbG83s2M9GoC
35
+ iHadjaBK6nUXzIZ2OCEldp3dzkozgJxu7tcb1Kmr9uAUFoot4w6yl+Kr4JZIW1ml
36
+ u3jdsCCQLM9dC78qib2fY8vh
37
+ -----END CERTIFICATE-----
38
+ date: 2019-11-15 00:00:00.000000000 Z
39
+ dependencies:
40
+ - !ruby/object:Gem::Dependency
41
+ name: rxfhelper
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '0.9'
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: 0.9.4
50
+ type: :runtime
51
+ prerelease: false
52
+ version_requirements: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - "~>"
55
+ - !ruby/object:Gem::Version
56
+ version: '0.9'
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ version: 0.9.4
60
+ description:
61
+ email: james@jamesrobertson.eu
62
+ executables: []
63
+ extensions: []
64
+ extra_rdoc_files: []
65
+ files:
66
+ - lib/qnd_html2page.rb
67
+ homepage: https://github.com/jrobertson/qnd_html2page
68
+ licenses:
69
+ - MIT
70
+ metadata: {}
71
+ post_install_message:
72
+ rdoc_options: []
73
+ require_paths:
74
+ - lib
75
+ required_ruby_version: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ version: '0'
80
+ required_rubygems_version: !ruby/object:Gem::Requirement
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ version: '0'
85
+ requirements: []
86
+ rubygems_version: 3.0.3
87
+ signing_key:
88
+ specification_version: 4
89
+ summary: Splits HTML into pages suitable for reading like a book.
90
+ test_files: []
metadata.gz.sig ADDED
Binary file