qnd_html2page 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d2d282862036e389d6ea21c2d648487f79d8393a96e81c5f9422f36383df55fc
4
- data.tar.gz: b006d2a97965f4433974e741150819bb8f033b32c3cb525a6c587c54458157cc
3
+ metadata.gz: ffa5d9991d46892e27a79ec207484b95b61c846f7cc410c3ec3cf340556f06eb
4
+ data.tar.gz: 598d0458826f7c293d2d9a0cbdcd31d8e191e37e1bb04520d3bbd73ce5ba952a
5
5
  SHA512:
6
- metadata.gz: 175ce68c0af5f9fdc2d05b506d0beb8ae53ad696a752bb93ca640493b49f765d80a4fc555ea5ac72f6b76ca8107e973265965757802a34d0693dc68d61568bae
7
- data.tar.gz: ba16588a73a4f35ae1fd8fa175fcc32d1a86076ec9dc6c29e24adde118e94099640109d37eeeb7ff6f87db96dd3b4ff5f8f5cbadc4444383bf48adf66bdcb523
6
+ metadata.gz: 24c2d07d88d7276e57e24cb59ad0240f3ce4e29a0dfbd6ffcefa510502f2342c8d5ffb1045e15165add188d5659d7766415f6763225aecb426c913b68b7bd87e
7
+ data.tar.gz: ef2828615ea6d377efd93d704b5cbe6c3025a2bc3ba5574fa153571d34b527eab269e1250cba6cbafbd6557f40f86c9ba22c0b9801ae268b8c3501441dbc5b36
Binary file
data.tar.gz.sig CHANGED
Binary file
@@ -8,14 +8,31 @@ require 'tempfile'
8
8
  require 'rxfhelper'
9
9
 
10
10
 
11
+ module ArraySlices
12
+
13
+ refine Array do
14
+
15
+ def slice_at(*indices)
16
+
17
+ a = indices
18
+ a << -1 if a[-1] != self[-1]
19
+ a.unshift -1
20
+ a.each_cons(2).map {|x1, x2| self.slice(x1+1..x2) }
21
+
22
+ end
23
+
24
+ end
25
+ end
11
26
 
12
27
  class QndHtml2Page
28
+ using ArraySlices
29
+ using ColouredText
13
30
 
14
31
  attr_reader :to_pages
15
32
 
16
- def initialize(html, debug: false, pg_height: 676)
33
+ def initialize(html, debug: false, pg_height: 770, width: '700px')
17
34
 
18
- @html, @height, @debug = html, pg_height, debug
35
+ @html, @height, @width, @debug = html, pg_height, width, debug
19
36
  @to_pages = scan(RXFHelper.read(@html).first)
20
37
 
21
38
  end
@@ -28,11 +45,12 @@ class QndHtml2Page
28
45
 
29
46
  doc = Rexle.new(html)
30
47
  body = doc.root.element('body')
48
+ body.attributes[:style] = 'width: ' + @width
31
49
 
32
50
  count = 0
33
51
  body.each_recursive do |e|
34
52
 
35
- puts 'e: ' + e.name if @debug
53
+ puts ('e: ' + e.xml).debug if @debug
36
54
  ignore_list = %w(span b li tr td dt dd em strong i a)
37
55
  next if ignore_list.include? e.name
38
56
  span = Rexle::Element.new('span').add_text(count.to_s)
@@ -49,46 +67,51 @@ class QndHtml2Page
49
67
  File.write tmpfile.path + '.html', doc.root.xml
50
68
 
51
69
  browser = Ferrum::Browser.new
70
+ #browser.resize width: 300, height: 300
52
71
  browser.goto('file://' + tmpfile.path + '.html')
72
+ browser.screenshot(path: "/tmp/page.jpg")
53
73
  span_list = browser.xpath('//span')
54
74
  a = span_list.map {|x| [x.text, x.find_position.last] }
55
75
 
56
- offset = 0
76
+
77
+ heights = ((a.last.last) / @height).round.to_i.times.inject([@height]) {|r, x| r << (r.last + @height) }
78
+ puts ('heights: ' + heights.inspect).debug if @debug
79
+ height = heights.shift
57
80
 
58
81
  a2 = a.inject([[]]) do |r,x|
59
82
 
60
- puts 'r: ' + x.inspect if @debug
61
- puts 'x: ' + x.inspect if @debug
62
- x.last < offset + @height ? (r.last << x) : (offset = x.last; r << [x])
83
+ puts ('r: ' + x.inspect).debug if @debug
84
+ puts ('x: ' + x.inspect).debug if @debug
85
+ puts ('height: ' + height.inspect).debug if @debug
86
+
87
+ x.last < height ? (r.last << x) : (height = heights.shift; r << [x])
63
88
  r
64
89
 
65
90
  end
66
91
 
67
92
 
68
93
  elements = doc.root.element('body').elements.to_a
69
- puts 'elements.length: ' + elements.length.inspect if @debug
70
- offset = 0
71
-
72
- puts 'a2: ' + a2.inspect if @debug
73
-
74
- pages = a2.map do |x|
75
-
76
- id = x.last.first
77
-
78
- puts 'id: ' + id.inspect if @debug
79
- puts 'offset: ' + offset.inspect if @debug
80
-
81
- a3 = elements[offset..-1].take_while do |e|
82
- puts 'e.text: ' + e.text.inspect
83
- e.text != id
84
- end
94
+ puts ('elements.length: ' + elements.length.inspect).debug if @debug
95
+ offset2 = 0
96
+
97
+ puts ('a2: ' + a2.inspect).debug if @debug
98
+
99
+ # find each last record span stop using the given id
100
+ stops = a2.map do |x|
101
+ elements.index(elements.find {|e| e.text == x.last.first })
102
+ end
103
+
104
+ puts ('stops: ' + stops.inspect).debug if @debug
105
+
106
+ pages = elements.slice_at(*stops).map do |e_list|
85
107
 
86
- offset = a3.length
87
108
  div = Rexle::Element.new 'div'
88
- a3.reject! {|e| e.name == 'span' and e.attributes[:class] == 'qndhtml2pg' }
89
- a3.each {|e| div.add e}
109
+ e_list.reject! {|e| e.name == 'span' and e.attributes[:class] == 'qndhtml2pg' }
110
+ next if e_list.empty?
111
+ e_list.each {|e| div.add e}
112
+
90
113
  div
91
- end
114
+ end.compact
92
115
 
93
116
  @to_pages = pages
94
117
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: qnd_html2page
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
@@ -35,7 +35,7 @@ cert_chain:
35
35
  iHadjaBK6nUXzIZ2OCEldp3dzkozgJxu7tcb1Kmr9uAUFoot4w6yl+Kr4JZIW1ml
36
36
  u3jdsCCQLM9dC78qib2fY8vh
37
37
  -----END CERTIFICATE-----
38
- date: 2019-11-15 00:00:00.000000000 Z
38
+ date: 2019-11-16 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: rxfhelper
metadata.gz.sig CHANGED
@@ -1 +1,2 @@
1
- ��ZeT
1
+ pޛ<��P�1W?��t�7�\p��@��_�Ȓ�D�˟1���z�X��jq8(z։b�:�q S �{�aјCDžt��c�]��ۄ��`��,���w.?F-`ܮQ�4E
2
+ dCG�Yk�