xml_row_finder 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cfc152e03ec51b624550b4b4bf031eae3e7205c42e1dccbdd4d45219d684860d
4
- data.tar.gz: 77be8ae4da89aded4ec73a653e9fe1e3d9815a144a29a37a73158b97174da984
3
+ metadata.gz: e993dbcc7aa78609f6da7dad8044858a117ad8f2a35dbaffb8a4bef967d1c2fa
4
+ data.tar.gz: c73231bd44d7845a132c740ef02c48afc187459470e877c63372f6f82f99b03d
5
5
  SHA512:
6
- metadata.gz: e83c208f4dfeac7827b9f8ca453baba4f19ff4be9d4341bbd15f46695948afd7af42ae5ad0c71efb994b58832c46b9cf836fcb2d115ffb126a49ebfc26065772
7
- data.tar.gz: 24335ada2416257aa9caebd29b687562e92fe2d0ca2ac71f59824448c98eee018e15f95ee60352ed326440a50cb5445d3fb5bd4cbb1a0c47eed58ffc85ff12ec
6
+ metadata.gz: 996c4a63be86e7abe8948ef5dd6860f2b9226e1e429ef0f51c7ece7efb4b54cb35b18b48488403264149183b0b61d3f7d05eecc52d1dd9841f9ab0c654ffc2c6
7
+ data.tar.gz: fe7558dbaf9a183e860b8214bac3dfc0ca797364775c3b49ddded4c24acea51a8c600e0d3e1dc53ac74f6557dd72eb4de5284212be15bfe3df0fba7d8ca86d89
checksums.yaml.gz.sig CHANGED
@@ -1 +1,6 @@
1
- ;�Ac.���ng�r; �%�6,z{ma��j��~�zJJ����Ù4�r�c{���E�޵��i�lDp��:R=[WmJ����=#Mb�F���n�@�E��O�F���_FK�-��4Z�*wT��x�RH;�L�G�d�&��Y�遲�Lo&��7�Eב-$:�1�؏��U���)y��"v{��/nD*:%��� �^�kz�8�n�%m14Ȯ�I�c\�YGNu�軀�J���{����Xs���=�@V;���M���ʺ���p�p����������Kb<#�J6��}�k���X��~��Hc�b���Ҏ���=� ��L��N��Գ*k�Rw��������B������jd�F�`WXx����J
1
+ �u�3hBju���4/+H��wt��:��H!����q�!9v��@/Ad䳹na$�]x�vw���.�r��X �5��d����2���� ���ٽR�J�O�O�:�k\��{􊒔�1��T��;�� 1
2
+ �V��=���
3
+ I&�3�s���Q�.V")��������&c��E+��i�͢Ԧa��eyK�ո�D~}r�PM�ߞw��h��� u�{^�R?%�ۮi����
4
+ +I�����6`��h>�����y6l+��@�Iq=�/"���v�׵���� HN�>���s4G3�/�H
5
+ �2�� �?նͰ;��8O�W�߯�M�����E��
6
+ �~���7��
@@ -15,6 +15,7 @@ class XMLRowFinder
15
15
 
16
16
  @debug = debug
17
17
  doc = Rexle.new(s)
18
+ @doc2 = Document.new(s)
18
19
 
19
20
  a = []
20
21
 
@@ -23,51 +24,31 @@ class XMLRowFinder
23
24
  a << e.backtrack.to_xpath
24
25
  end
25
26
 
26
- a2 = a.select{ |e| a.count(e) > 1 }.map {|x| x.split('/')}.uniq
27
+ @to_a = a2 = a.map {|e| [a.count(e), e] }
28
+ xpath = a2.max_by(&:first).last
27
29
 
28
- # remove parent nodes on the same branch
29
- #
30
- a2.reject!.with_index do |x,i|
31
- next if i == a2.length-1
32
- x == a2[i+1][0..-2]
33
- end
30
+ a3 = xpath.split('/')
31
+ a4 = [xpath]
32
+ p1 = []
34
33
 
35
- # remove elements from rows which only exist once in the document
36
- #
37
- a3 = a2.map do |row|
38
- row.reject do |x|
39
- found = doc.root.xpath('//' + x)
40
- found.length < 2
41
- end
34
+ until (a3.length < 1) do
35
+ p1 << a3.pop; a4 << a3.join('/') + "[%s]" % p1.reverse.join('/')
42
36
  end
43
37
 
44
- # add parent node to the row as a reference for the xpath
45
- #
46
- a4 = a3.map.with_index do |row,i|
47
- a2[i][-(row.length+1)..-1]
38
+ a5 = a4[0..-2].map do |xpath2|
39
+ [XPath.match(@doc2, xpath2).length, xpath2]
48
40
  end
49
41
 
50
- # find the parent node attributes
51
- #
52
- @to_a = a4.map do |col|
53
-
54
- # currently using REXML for this XPath since there is a bug in
55
- # Rexle when attempting the following
56
- #
57
- doc2 = Document.new(s)
58
- xpath = "//%s[%s]" % [col[0], col[1..-1].join('/')]
59
- puts 'xpath: ' + xpath.inspect if @debug
60
- r = XPath.first(doc2, xpath)
61
- xpath_a = BacktrackXPath.new(r).to_xpath
62
-
63
- if col.length >= 3
64
- "%s/%s[%s]" % [xpath_a, col[1], col[2..-1].join('/')]
65
- else
66
- "%s/%s" % [xpath_a, col[1]]
67
- end
68
- end
69
-
42
+ @xpath = a5.reverse.detect {|num, xpath2| num > 1}.last
43
+
70
44
  end
71
45
 
72
- end
46
+ def rows()
47
+ XPath.match(@doc2, @xpath)
48
+ end
49
+
50
+ def to_xpath()
51
+ @xpath
52
+ end
73
53
 
54
+ end
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: xml_row_finder
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
@@ -35,7 +35,7 @@ cert_chain:
35
35
  a/pAsvo0jT6QTnSB7xzsx8LSFDT5tfHKR9Dcn1Y3R06fsh02JvwxaSAMgDBM2aFb
36
36
  2A7/BQ1hD7SU82VTxB1gFIHl
37
37
  -----END CERTIFICATE-----
38
- date: 2022-01-21 00:00:00.000000000 Z
38
+ date: 2022-01-24 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: rexle
@@ -83,7 +83,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
83
83
  - !ruby/object:Gem::Version
84
84
  version: '0'
85
85
  requirements: []
86
- rubygems_version: 3.2.22
86
+ rubyforge_project:
87
+ rubygems_version: 2.7.10
87
88
  signing_key:
88
89
  specification_version: 4
89
90
  summary: Attempts to find repeating rows in XHTML and returns the associated xpath.
metadata.gz.sig CHANGED
Binary file