qnd_html2page 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d2d282862036e389d6ea21c2d648487f79d8393a96e81c5f9422f36383df55fc
4
- data.tar.gz: b006d2a97965f4433974e741150819bb8f033b32c3cb525a6c587c54458157cc
3
+ metadata.gz: ffa5d9991d46892e27a79ec207484b95b61c846f7cc410c3ec3cf340556f06eb
4
+ data.tar.gz: 598d0458826f7c293d2d9a0cbdcd31d8e191e37e1bb04520d3bbd73ce5ba952a
5
5
  SHA512:
6
- metadata.gz: 175ce68c0af5f9fdc2d05b506d0beb8ae53ad696a752bb93ca640493b49f765d80a4fc555ea5ac72f6b76ca8107e973265965757802a34d0693dc68d61568bae
7
- data.tar.gz: ba16588a73a4f35ae1fd8fa175fcc32d1a86076ec9dc6c29e24adde118e94099640109d37eeeb7ff6f87db96dd3b4ff5f8f5cbadc4444383bf48adf66bdcb523
6
+ metadata.gz: 24c2d07d88d7276e57e24cb59ad0240f3ce4e29a0dfbd6ffcefa510502f2342c8d5ffb1045e15165add188d5659d7766415f6763225aecb426c913b68b7bd87e
7
+ data.tar.gz: ef2828615ea6d377efd93d704b5cbe6c3025a2bc3ba5574fa153571d34b527eab269e1250cba6cbafbd6557f40f86c9ba22c0b9801ae268b8c3501441dbc5b36
Binary file
data.tar.gz.sig CHANGED
Binary file
@@ -8,14 +8,31 @@ require 'tempfile'
8
8
  require 'rxfhelper'
9
9
 
10
10
 
11
+ module ArraySlices
12
+
13
+ refine Array do
14
+
15
+ def slice_at(*indices)
16
+
17
+ a = indices
18
+ a << -1 if a[-1] != self[-1]
19
+ a.unshift -1
20
+ a.each_cons(2).map {|x1, x2| self.slice(x1+1..x2) }
21
+
22
+ end
23
+
24
+ end
25
+ end
11
26
 
12
27
  class QndHtml2Page
28
+ using ArraySlices
29
+ using ColouredText
13
30
 
14
31
  attr_reader :to_pages
15
32
 
16
- def initialize(html, debug: false, pg_height: 676)
33
+ def initialize(html, debug: false, pg_height: 770, width: '700px')
17
34
 
18
- @html, @height, @debug = html, pg_height, debug
35
+ @html, @height, @width, @debug = html, pg_height, width, debug
19
36
  @to_pages = scan(RXFHelper.read(@html).first)
20
37
 
21
38
  end
@@ -28,11 +45,12 @@ class QndHtml2Page
28
45
 
29
46
  doc = Rexle.new(html)
30
47
  body = doc.root.element('body')
48
+ body.attributes[:style] = 'width: ' + @width
31
49
 
32
50
  count = 0
33
51
  body.each_recursive do |e|
34
52
 
35
- puts 'e: ' + e.name if @debug
53
+ puts ('e: ' + e.xml).debug if @debug
36
54
  ignore_list = %w(span b li tr td dt dd em strong i a)
37
55
  next if ignore_list.include? e.name
38
56
  span = Rexle::Element.new('span').add_text(count.to_s)
@@ -49,46 +67,51 @@ class QndHtml2Page
49
67
  File.write tmpfile.path + '.html', doc.root.xml
50
68
 
51
69
  browser = Ferrum::Browser.new
70
+ #browser.resize width: 300, height: 300
52
71
  browser.goto('file://' + tmpfile.path + '.html')
72
+ browser.screenshot(path: "/tmp/page.jpg")
53
73
  span_list = browser.xpath('//span')
54
74
  a = span_list.map {|x| [x.text, x.find_position.last] }
55
75
 
56
- offset = 0
76
+
77
+ heights = ((a.last.last) / @height).round.to_i.times.inject([@height]) {|r, x| r << (r.last + @height) }
78
+ puts ('heights: ' + heights.inspect).debug if @debug
79
+ height = heights.shift
57
80
 
58
81
  a2 = a.inject([[]]) do |r,x|
59
82
 
60
- puts 'r: ' + x.inspect if @debug
61
- puts 'x: ' + x.inspect if @debug
62
- x.last < offset + @height ? (r.last << x) : (offset = x.last; r << [x])
83
+ puts ('r: ' + x.inspect).debug if @debug
84
+ puts ('x: ' + x.inspect).debug if @debug
85
+ puts ('height: ' + height.inspect).debug if @debug
86
+
87
+ x.last < height ? (r.last << x) : (height = heights.shift; r << [x])
63
88
  r
64
89
 
65
90
  end
66
91
 
67
92
 
68
93
  elements = doc.root.element('body').elements.to_a
69
- puts 'elements.length: ' + elements.length.inspect if @debug
70
- offset = 0
71
-
72
- puts 'a2: ' + a2.inspect if @debug
73
-
74
- pages = a2.map do |x|
75
-
76
- id = x.last.first
77
-
78
- puts 'id: ' + id.inspect if @debug
79
- puts 'offset: ' + offset.inspect if @debug
80
-
81
- a3 = elements[offset..-1].take_while do |e|
82
- puts 'e.text: ' + e.text.inspect
83
- e.text != id
84
- end
94
+ puts ('elements.length: ' + elements.length.inspect).debug if @debug
95
+ offset2 = 0
96
+
97
+ puts ('a2: ' + a2.inspect).debug if @debug
98
+
99
+ # find each last record span stop using the given id
100
+ stops = a2.map do |x|
101
+ elements.index(elements.find {|e| e.text == x.last.first })
102
+ end
103
+
104
+ puts ('stops: ' + stops.inspect).debug if @debug
105
+
106
+ pages = elements.slice_at(*stops).map do |e_list|
85
107
 
86
- offset = a3.length
87
108
  div = Rexle::Element.new 'div'
88
- a3.reject! {|e| e.name == 'span' and e.attributes[:class] == 'qndhtml2pg' }
89
- a3.each {|e| div.add e}
109
+ e_list.reject! {|e| e.name == 'span' and e.attributes[:class] == 'qndhtml2pg' }
110
+ next if e_list.empty?
111
+ e_list.each {|e| div.add e}
112
+
90
113
  div
91
- end
114
+ end.compact
92
115
 
93
116
  @to_pages = pages
94
117
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: qnd_html2page
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
@@ -35,7 +35,7 @@ cert_chain:
35
35
  iHadjaBK6nUXzIZ2OCEldp3dzkozgJxu7tcb1Kmr9uAUFoot4w6yl+Kr4JZIW1ml
36
36
  u3jdsCCQLM9dC78qib2fY8vh
37
37
  -----END CERTIFICATE-----
38
- date: 2019-11-15 00:00:00.000000000 Z
38
+ date: 2019-11-16 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: rxfhelper
metadata.gz.sig CHANGED
@@ -1 +1,2 @@
1
- ��ZeT
1
+ pޛ<��P�1W?��t�7�\p��@��_�Ȓ�D�˟1���z�X��jq8(z։b�:�q S �{�aјCDžt��c�]��ۄ��`��,���w.?F-`ܮQ�4E
2
+ dCG�Yk�