qnd_html2page 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: cd082fe1f0ecffa42aa3f42ad0c8825aa71505d3c1dfb148cbfdc209c2871836
4
+ data.tar.gz: 507203ae52228dd19d1b68e554399c0fbfe4c4baf6fec969b46d515b28c1b937
5
+ SHA512:
6
+ metadata.gz: 0f16a5c52a7afee1a6a3ea478a7fda44b6aef7950b75ad96d60fea5a4c838b4e4c10c681af8e2714ea5907686bb6d6a611296d4c5840f02195e0c592e7ccd37a
7
+ data.tar.gz: af07a61b492b13f794d7e4d5572bed280876434da1c95de6a96dc0612828384b7d90fe2dfdf7ac8490e8cfadeffddb373ae0f8054a5fc60df1e9193f135c190f
checksums.yaml.gz.sig ADDED
Binary file
@@ -0,0 +1,97 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # file: qnd_html2page.rb
4
+
5
+ require 'rexle'
6
+ require 'ferrum'
7
+ require 'tempfile'
8
+ require 'rxfhelper'
9
+
10
+
11
+
12
+ class QndHtml2Page
13
+
14
+ attr_reader :to_pages
15
+
16
+ def initialize(html, debug: false, pg_height: 676)
17
+
18
+ @html, @height, @debug = html, pg_height, debug
19
+ @to_pages = scan(RXFHelper.read(@html).first)
20
+
21
+ end
22
+
23
+ private
24
+
25
+ def scan(html)
26
+
27
+ # add the span tag after every element in the body
28
+
29
+ doc = Rexle.new(html)
30
+ body = doc.root.element('body')
31
+
32
+ count = 0
33
+ body.each_recursive do |e|
34
+
35
+ puts 'e: ' + e.name if @debug
36
+ ignore_list = %w(span b li tr td dt dd em strong i a)
37
+ next if ignore_list.include? e.name
38
+ span = Rexle::Element.new('span').add_text(count.to_s)
39
+ span.attributes[:class] = 'qndhtml2pg'
40
+ e.insert_after span
41
+ count += 1
42
+
43
+ end
44
+
45
+ # Fetch the y coordinate of every span tag to determine the
46
+ # elements that can fit into each page.
47
+
48
+ tmpfile = Tempfile.new('browser')
49
+ File.write tmpfile.path + '.html', doc.root.xml
50
+
51
+ browser = Ferrum::Browser.new
52
+ browser.goto('file://' + tmpfile.path + '.html')
53
+ span_list = browser.xpath('//span')
54
+ a = span_list.map {|x| [x.text, x.find_position.last] }
55
+
56
+ offset = 0
57
+
58
+ a2 = a.inject([[]]) do |r,x|
59
+
60
+ puts 'r: ' + x.inspect if @debug
61
+ puts 'x: ' + x.inspect if @debug
62
+ x.last < offset + @height ? (r.last << x) : (offset = x.last; r << [x])
63
+ r
64
+
65
+ end
66
+
67
+
68
+ elements = doc.root.element('body').elements.to_a
69
+ puts 'elements.length: ' + elements.length.inspect if @debug
70
+ offset = 0
71
+
72
+ puts 'a2: ' + a2.inspect if @debug
73
+
74
+ pages = a2.map do |x|
75
+
76
+ id = x.last.first
77
+
78
+ puts 'id: ' + id.inspect if @debug
79
+ puts 'offset: ' + offset.inspect if @debug
80
+
81
+ a3 = elements[offset..-1].take_while do |e|
82
+ puts 'e.text: ' + e.text.inspect
83
+ e.text != id
84
+ end
85
+
86
+ offset = a3.length
87
+ div = Rexle::Element.new 'div'
88
+ a3.reject! {|e| e.name == 'span' and e.attributes[:class] == 'qndhtml2pg' }
89
+ a3.each {|e| div.add e}
90
+ div
91
+ end
92
+
93
+ @to_pages = pages
94
+
95
+ end
96
+
97
+ end
data.tar.gz.sig ADDED
Binary file
metadata ADDED
@@ -0,0 +1,90 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: qnd_html2page
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - James Robertson
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain:
11
+ - |
12
+ -----BEGIN CERTIFICATE-----
13
+ MIIEXjCCAsagAwIBAgIBATANBgkqhkiG9w0BAQsFADAsMSowKAYDVQQDDCFnZW1t
14
+ YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMTkxMTE1MTAzMzE4WhcN
15
+ MjAxMTE0MTAzMzE4WjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
16
+ cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQDja3r/
17
+ qSzQNOzRg4qp9obwEq37YTr0gCLF5QCiAyjhfBqxv/OBPg7b+06PyOPShMkIbLYy
18
+ gnRhgzvDvDf2kpS/9ziE2bmOCp8MimWS0I0aT77qbEho6knw5BfEsPEZfG5oVtmy
19
+ au5iPivNst1sOKMTw6v2OMeiXx0a5fxZJtMnKr8/w46HKB3bpZ9m2+JDnx7cZ+Fh
20
+ RF0XTuQz79uHv1T+4oTU/DrBRqqlIASDVPmeyfOlkwTPQcPE875LclbDoBWz5vMT
21
+ kkTETVHbgB+D6i6mV9Sp896/noPP8JH84D+NvqgUppbRdUTiT+WnCFTV4ooJkACq
22
+ WC5twvtJuuvMOv7WsfsTqdZLJOm1g19JU9V7xYNxFbHQNHoLNomRKZ+NYjESkhVV
23
+ U4w4ILlvPHLfEPgeZlVGSa835XWabBwNVLXLUW/ZmlEQTz+u8isbtAk7kKbWaPUF
24
+ mEHhDcc2UG6l7uGe8E/jjOI8UufkuDFr/06QTf/azeu4AHWzGCG22MdhX3ECAwEA
25
+ AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQUNwHbSmuk
26
+ gigzYYMStlUkMnT8CzcwJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
27
+ c29uLmV1MCYGA1UdEgQfMB2BG2dlbW1hc3RlckBqYW1lc3JvYmVydHNvbi5ldTAN
28
+ BgkqhkiG9w0BAQsFAAOCAYEADCXJFuzQ8G2QrTRxJUJiWZq8ayjfFHfhr5cqvxUy
29
+ ZZ6CsT02ffAU6W7116mIBTPLD1tS5ZAws8fJ3gs+vSLJAYTteUX2ykyJLlF+cjZz
30
+ i1T2z6R6VWPNAm4wcAdY96f3Dgq/2g3lq8xdwf26XITYFKGiqMnE9IGJ+J6V7+vr
31
+ szMrxeV30iUhoeSNuM57pdDIIMWbSLSavUeoqcuTYVa9zSqz5LBdk85ibf637NbT
32
+ A0dlTm3pbiomqQIVGoRr/Gj1Dz2/dHzndXS3k8hg4iIvrMLRymIghh7AZrJcMnpM
33
+ YSVGl4kHZBGFTnkgJEnyjA0FGgDJcyJEzQJyTWzjO3TkX8pYvwyKk6ILLBHANAIQ
34
+ IHFo8pGEa/bYN7Si64eTfNtXBnEHQC1A15AvXtzjsBtqIA2e1gsWCbG83s2M9GoC
35
+ iHadjaBK6nUXzIZ2OCEldp3dzkozgJxu7tcb1Kmr9uAUFoot4w6yl+Kr4JZIW1ml
36
+ u3jdsCCQLM9dC78qib2fY8vh
37
+ -----END CERTIFICATE-----
38
+ date: 2019-11-15 00:00:00.000000000 Z
39
+ dependencies:
40
+ - !ruby/object:Gem::Dependency
41
+ name: rxfhelper
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '0.9'
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: 0.9.4
50
+ type: :runtime
51
+ prerelease: false
52
+ version_requirements: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - "~>"
55
+ - !ruby/object:Gem::Version
56
+ version: '0.9'
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ version: 0.9.4
60
+ description:
61
+ email: james@jamesrobertson.eu
62
+ executables: []
63
+ extensions: []
64
+ extra_rdoc_files: []
65
+ files:
66
+ - lib/qnd_html2page.rb
67
+ homepage: https://github.com/jrobertson/qnd_html2page
68
+ licenses:
69
+ - MIT
70
+ metadata: {}
71
+ post_install_message:
72
+ rdoc_options: []
73
+ require_paths:
74
+ - lib
75
+ required_ruby_version: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ version: '0'
80
+ required_rubygems_version: !ruby/object:Gem::Requirement
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ version: '0'
85
+ requirements: []
86
+ rubygems_version: 3.0.3
87
+ signing_key:
88
+ specification_version: 4
89
+ summary: Splits HTML into pages suitable for reading like a book.
90
+ test_files: []
metadata.gz.sig ADDED
Binary file