xml_row_finder 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e993dbcc7aa78609f6da7dad8044858a117ad8f2a35dbaffb8a4bef967d1c2fa
4
- data.tar.gz: c73231bd44d7845a132c740ef02c48afc187459470e877c63372f6f82f99b03d
3
+ metadata.gz: 29c7aa56863bd3e1d4d7cc5bcc35c05a3c447fa6e3529b7cef763dbe2ca14091
4
+ data.tar.gz: 4113bc3c0d63433df71e5e04720c084a457a698eb432544c0043bc5ee4aa09f1
5
5
  SHA512:
6
- metadata.gz: 996c4a63be86e7abe8948ef5dd6860f2b9226e1e429ef0f51c7ece7efb4b54cb35b18b48488403264149183b0b61d3f7d05eecc52d1dd9841f9ab0c654ffc2c6
7
- data.tar.gz: fe7558dbaf9a183e860b8214bac3dfc0ca797364775c3b49ddded4c24acea51a8c600e0d3e1dc53ac74f6557dd72eb4de5284212be15bfe3df0fba7d8ca86d89
6
+ metadata.gz: b1424142187bb87172c1c772f54a4693c36b8f903fc20781275ea5c70003cff388472c33eb68815ea2ce1d2f70e0b0b9ba84db1caa04032615b8fc7a1a74a259
7
+ data.tar.gz: d34b7cf995f00cadf57c4ecec858d56540cf43ca7234b1db4937b4ac83cea440f32d6ed38479983476e37ad11e00a9d6242bae33159586981d77174992d42698
checksums.yaml.gz.sig CHANGED
@@ -1,6 +1,3 @@
1
- �u�3hBju���4/+H��wt��:��H!����q�!9v��@/Ad䳹na$�]x�vw���.�r��X �5��d����2���� ���ٽRJ�O�O�:�k\��{􊒔�1��T��;�� 1�
2
- V��=��Ų�
3
- I&�3�s���Q�.V")��������&c��E+��i�͢Ԧa��eyK�ո�D~}r�PM�ߞw��h��� u�{^�R?%�ۮi����
4
- +I�����6`��h>�����y6l+��@�Iq=�/"���v�׵���� HN�>���s4G3�/�H
5
- �2�� �?նͰ;��8O�W�߯�M�����E��
6
- �~���7��
1
+ n��v,soF6D:�K��A���ƈiK3S���`5��hIᇔ����Po��
2
+ \f���@���s�*���j"��w7}
3
+ ���!�1uj��j^�
@@ -2,29 +2,44 @@
2
2
 
3
3
  # file: xml_row_finder.rb
4
4
 
5
- require 'rexle'
6
- require 'rexml'
7
- include REXML
5
+ require 'nokorexi'
8
6
 
9
7
 
10
8
  class XMLRowFinder
11
9
 
12
10
  attr_reader :to_a
13
11
 
14
- def initialize(s, debug: false)
12
+ def initialize(raws, debug: false)
15
13
 
16
14
  @debug = debug
17
- doc = Rexle.new(s)
18
- @doc2 = Document.new(s)
15
+
16
+ @doc = if raws =~ /^http/ then
17
+
18
+ nki = Nokorexi.new(url=raws) do |doc1|
19
+ doc1.xpath('//*[@onclick]').each do |e|
20
+ e.attributes['onclick'].value = ''
21
+ end
22
+
23
+ doc1.xpath('//*[@onmousedown]').each do |e|
24
+ e.attributes['onmousedown'].value = ''
25
+ end
26
+
27
+ end
28
+
29
+ nki.to_doc
30
+
31
+ else
32
+ Rexle.new(raws)
33
+ end
19
34
 
20
35
  a = []
21
36
 
22
- doc.root.each_recursive do |e|
37
+ @doc.root.each_recursive do |e|
23
38
  e.attributes.delete
24
39
  a << e.backtrack.to_xpath
25
40
  end
26
41
 
27
- @to_a = a2 = a.map {|e| [a.count(e), e] }
42
+ @to_a = a2 = a.map {|e| [a.count(e), e] }.uniq
28
43
  xpath = a2.max_by(&:first).last
29
44
 
30
45
  a3 = xpath.split('/')
@@ -35,20 +50,60 @@ class XMLRowFinder
35
50
  p1 << a3.pop; a4 << a3.join('/') + "[%s]" % p1.reverse.join('/')
36
51
  end
37
52
 
53
+ # using Nokogiri since Rexle has a bug with xpath predicates
54
+ #
55
+ @doc2 = Nokogiri::XML(@doc.root.xml)
56
+
38
57
  a5 = a4[0..-2].map do |xpath2|
39
- [XPath.match(@doc2, xpath2).length, xpath2]
58
+ [@doc2.xpath(xpath2).length, xpath2]
40
59
  end
41
60
 
42
61
  @xpath = a5.reverse.detect {|num, xpath2| num > 1}.last
43
62
 
63
+ last_row = @doc2.xpath(@xpath).last
64
+
65
+ # find the container element
66
+ xpath = @xpath[/^[^\[]+/]
67
+ axpath = xpath.split('/')
68
+ e = @doc.element xpath
69
+
70
+ until (e.xml.include? last_row) do
71
+ axpath.pop
72
+ e = @doc.element axpath.join('/')
73
+ end
74
+
75
+ @cont_xpath = axpath.join('/')
76
+
77
+ end
78
+
79
+ # returns the container element for all rows
80
+ # object returned: Rexle::Element
81
+ #
82
+ def body()
83
+ @doc.element @cont_xpath
44
84
  end
45
85
 
86
+ # returns the xpath pointing to the container element for all rows
87
+ #
88
+ def body_xpath()
89
+ @cont_xpath
90
+ end
91
+
92
+
93
+ # returns rows
94
+ # object returned: An array of Nokogiri XML Element object
95
+ #
46
96
  def rows()
47
- XPath.match(@doc2, @xpath)
97
+ @doc2.xpath @xpath
48
98
  end
49
99
 
100
+ # returns the xpath pointing to the rows
101
+ #
50
102
  def to_xpath()
51
103
  @xpath
52
104
  end
53
105
 
106
+ alias rows_xpath to_xpath
107
+
108
+
54
109
  end
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: xml_row_finder
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
@@ -38,25 +38,25 @@ cert_chain:
38
38
  date: 2022-01-24 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
- name: rexle
41
+ name: nokorexi
42
42
  requirement: !ruby/object:Gem::Requirement
43
43
  requirements:
44
44
  - - "~>"
45
45
  - !ruby/object:Gem::Version
46
- version: '1.5'
46
+ version: '0.5'
47
47
  - - ">="
48
48
  - !ruby/object:Gem::Version
49
- version: 1.5.14
49
+ version: 0.5.4
50
50
  type: :runtime
51
51
  prerelease: false
52
52
  version_requirements: !ruby/object:Gem::Requirement
53
53
  requirements:
54
54
  - - "~>"
55
55
  - !ruby/object:Gem::Version
56
- version: '1.5'
56
+ version: '0.5'
57
57
  - - ">="
58
58
  - !ruby/object:Gem::Version
59
- version: 1.5.14
59
+ version: 0.5.4
60
60
  description:
61
61
  email: digital.robertson@gmail.com
62
62
  executables: []
metadata.gz.sig CHANGED
Binary file