xml_row_finder 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e993dbcc7aa78609f6da7dad8044858a117ad8f2a35dbaffb8a4bef967d1c2fa
4
- data.tar.gz: c73231bd44d7845a132c740ef02c48afc187459470e877c63372f6f82f99b03d
3
+ metadata.gz: 29c7aa56863bd3e1d4d7cc5bcc35c05a3c447fa6e3529b7cef763dbe2ca14091
4
+ data.tar.gz: 4113bc3c0d63433df71e5e04720c084a457a698eb432544c0043bc5ee4aa09f1
5
5
  SHA512:
6
- metadata.gz: 996c4a63be86e7abe8948ef5dd6860f2b9226e1e429ef0f51c7ece7efb4b54cb35b18b48488403264149183b0b61d3f7d05eecc52d1dd9841f9ab0c654ffc2c6
7
- data.tar.gz: fe7558dbaf9a183e860b8214bac3dfc0ca797364775c3b49ddded4c24acea51a8c600e0d3e1dc53ac74f6557dd72eb4de5284212be15bfe3df0fba7d8ca86d89
6
+ metadata.gz: b1424142187bb87172c1c772f54a4693c36b8f903fc20781275ea5c70003cff388472c33eb68815ea2ce1d2f70e0b0b9ba84db1caa04032615b8fc7a1a74a259
7
+ data.tar.gz: d34b7cf995f00cadf57c4ecec858d56540cf43ca7234b1db4937b4ac83cea440f32d6ed38479983476e37ad11e00a9d6242bae33159586981d77174992d42698
checksums.yaml.gz.sig CHANGED
@@ -1,6 +1,3 @@
1
- �u�3hBju���4/+H��wt��:��H!����q�!9v��@/Ad䳹na$�]x�vw���.�r��X �5��d����2���� ���ٽRJ�O�O�:�k\��{􊒔�1��T��;�� 1�
2
- V��=���
3
- I&�3�s���Q�.V")��������&c��E+��i�͢Ԧa��eyK�ո�D~}r�PM�ߞw��h��� u�{^�R?%�ۮi����
4
- +I�����6`��h>�����y6l+��@�Iq=�/"���v�׵���� HN�>���s4G3�/�H
5
- �2�� �?նͰ;��8O�W�߯�M�����E��
6
- �~���7��
1
+ n��v,soF6D:�K��A���ƈiK3S���`5��hIᇔ����Po��
2
+ \f���@���s�*���j"��w7}
3
+ ���!�1uj��j^�
@@ -2,29 +2,44 @@
2
2
 
3
3
  # file: xml_row_finder.rb
4
4
 
5
- require 'rexle'
6
- require 'rexml'
7
- include REXML
5
+ require 'nokorexi'
8
6
 
9
7
 
10
8
  class XMLRowFinder
11
9
 
12
10
  attr_reader :to_a
13
11
 
14
- def initialize(s, debug: false)
12
+ def initialize(raws, debug: false)
15
13
 
16
14
  @debug = debug
17
- doc = Rexle.new(s)
18
- @doc2 = Document.new(s)
15
+
16
+ @doc = if raws =~ /^http/ then
17
+
18
+ nki = Nokorexi.new(url=raws) do |doc1|
19
+ doc1.xpath('//*[@onclick]').each do |e|
20
+ e.attributes['onclick'].value = ''
21
+ end
22
+
23
+ doc1.xpath('//*[@onmousedown]').each do |e|
24
+ e.attributes['onmousedown'].value = ''
25
+ end
26
+
27
+ end
28
+
29
+ nki.to_doc
30
+
31
+ else
32
+ Rexle.new(raws)
33
+ end
19
34
 
20
35
  a = []
21
36
 
22
- doc.root.each_recursive do |e|
37
+ @doc.root.each_recursive do |e|
23
38
  e.attributes.delete
24
39
  a << e.backtrack.to_xpath
25
40
  end
26
41
 
27
- @to_a = a2 = a.map {|e| [a.count(e), e] }
42
+ @to_a = a2 = a.map {|e| [a.count(e), e] }.uniq
28
43
  xpath = a2.max_by(&:first).last
29
44
 
30
45
  a3 = xpath.split('/')
@@ -35,20 +50,60 @@ class XMLRowFinder
35
50
  p1 << a3.pop; a4 << a3.join('/') + "[%s]" % p1.reverse.join('/')
36
51
  end
37
52
 
53
+ # using Nokogiri since Rexle has a bug with xpath predicates
54
+ #
55
+ @doc2 = Nokogiri::XML(@doc.root.xml)
56
+
38
57
  a5 = a4[0..-2].map do |xpath2|
39
- [XPath.match(@doc2, xpath2).length, xpath2]
58
+ [@doc2.xpath(xpath2).length, xpath2]
40
59
  end
41
60
 
42
61
  @xpath = a5.reverse.detect {|num, xpath2| num > 1}.last
43
62
 
63
+ last_row = @doc2.xpath(@xpath).last
64
+
65
+ # find the container element
66
+ xpath = @xpath[/^[^\[]+/]
67
+ axpath = xpath.split('/')
68
+ e = @doc.element xpath
69
+
70
+ until (e.xml.include? last_row) do
71
+ axpath.pop
72
+ e = @doc.element axpath.join('/')
73
+ end
74
+
75
+ @cont_xpath = axpath.join('/')
76
+
77
+ end
78
+
79
+ # returns the container element for all rows
80
+ # object returned: Rexle::Element
81
+ #
82
+ def body()
83
+ @doc.element @cont_xpath
44
84
  end
45
85
 
86
+ # returns the xpath pointing to the container element for all rows
87
+ #
88
+ def body_xpath()
89
+ @cont_xpath
90
+ end
91
+
92
+
93
+ # returns rows
94
+ # object returned: An array of Nokogiri XML Element object
95
+ #
46
96
  def rows()
47
- XPath.match(@doc2, @xpath)
97
+ @doc2.xpath @xpath
48
98
  end
49
99
 
100
+ # returns the xpath pointing to the rows
101
+ #
50
102
  def to_xpath()
51
103
  @xpath
52
104
  end
53
105
 
106
+ alias rows_xpath to_xpath
107
+
108
+
54
109
  end
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: xml_row_finder
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
@@ -38,25 +38,25 @@ cert_chain:
38
38
  date: 2022-01-24 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
- name: rexle
41
+ name: nokorexi
42
42
  requirement: !ruby/object:Gem::Requirement
43
43
  requirements:
44
44
  - - "~>"
45
45
  - !ruby/object:Gem::Version
46
- version: '1.5'
46
+ version: '0.5'
47
47
  - - ">="
48
48
  - !ruby/object:Gem::Version
49
- version: 1.5.14
49
+ version: 0.5.4
50
50
  type: :runtime
51
51
  prerelease: false
52
52
  version_requirements: !ruby/object:Gem::Requirement
53
53
  requirements:
54
54
  - - "~>"
55
55
  - !ruby/object:Gem::Version
56
- version: '1.5'
56
+ version: '0.5'
57
57
  - - ">="
58
58
  - !ruby/object:Gem::Version
59
- version: 1.5.14
59
+ version: 0.5.4
60
60
  description:
61
61
  email: digital.robertson@gmail.com
62
62
  executables: []
metadata.gz.sig CHANGED
Binary file