xml_row_finder 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: cfc152e03ec51b624550b4b4bf031eae3e7205c42e1dccbdd4d45219d684860d
4
+ data.tar.gz: 77be8ae4da89aded4ec73a653e9fe1e3d9815a144a29a37a73158b97174da984
5
+ SHA512:
6
+ metadata.gz: e83c208f4dfeac7827b9f8ca453baba4f19ff4be9d4341bbd15f46695948afd7af42ae5ad0c71efb994b58832c46b9cf836fcb2d115ffb126a49ebfc26065772
7
+ data.tar.gz: 24335ada2416257aa9caebd29b687562e92fe2d0ca2ac71f59824448c98eee018e15f95ee60352ed326440a50cb5445d3fb5bd4cbb1a0c47eed58ffc85ff12ec
checksums.yaml.gz.sig ADDED
@@ -0,0 +1 @@
1
+ ;�Ac.���n�g�r; �%�6,z{ma��j��~�zJJ����Ù4�r�c{���E�޵��i�lDp��:R=[W�mJ����=#M�b�F���n�@�E��O�F���_FK�-��4Z�*wT��x�RH;�L�G�d�&��Y�遲�Lo&��7�Eב-$:�1�؏��U���)y��"�v{��/nD*:%��� �^�kz�8�n�%m14Ȯ�I�c\�YGNu�軀�J���{����Xs���=�@V;���M���ʺ���p�p����������Kb<#�J6��}�k���X��~��Hc�b���Ҏ���=� ��L��N��Գ*k�Rw��������B������jd�F�`WXx����J
@@ -0,0 +1,73 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # file: xml_row_finder.rb
4
+
5
+ require 'rexle'
6
+ require 'rexml'
7
+ include REXML
8
+
9
+
10
+ class XMLRowFinder
11
+
12
+ attr_reader :to_a
13
+
14
+ def initialize(s, debug: false)
15
+
16
+ @debug = debug
17
+ doc = Rexle.new(s)
18
+
19
+ a = []
20
+
21
+ doc.root.each_recursive do |e|
22
+ e.attributes.delete
23
+ a << e.backtrack.to_xpath
24
+ end
25
+
26
+ a2 = a.select{ |e| a.count(e) > 1 }.map {|x| x.split('/')}.uniq
27
+
28
+ # remove parent nodes on the same branch
29
+ #
30
+ a2.reject!.with_index do |x,i|
31
+ next if i == a2.length-1
32
+ x == a2[i+1][0..-2]
33
+ end
34
+
35
+ # remove elements from rows which only exist once in the document
36
+ #
37
+ a3 = a2.map do |row|
38
+ row.reject do |x|
39
+ found = doc.root.xpath('//' + x)
40
+ found.length < 2
41
+ end
42
+ end
43
+
44
+ # add parent node to the row as a reference for the xpath
45
+ #
46
+ a4 = a3.map.with_index do |row,i|
47
+ a2[i][-(row.length+1)..-1]
48
+ end
49
+
50
+ # find the parent node attributes
51
+ #
52
+ @to_a = a4.map do |col|
53
+
54
+ # currently using REXML for this XPath since there is a bug in
55
+ # Rexle when attempting the following
56
+ #
57
+ doc2 = Document.new(s)
58
+ xpath = "//%s[%s]" % [col[0], col[1..-1].join('/')]
59
+ puts 'xpath: ' + xpath.inspect if @debug
60
+ r = XPath.first(doc2, xpath)
61
+ xpath_a = BacktrackXPath.new(r).to_xpath
62
+
63
+ if col.length >= 3
64
+ "%s/%s[%s]" % [xpath_a, col[1], col[2..-1].join('/')]
65
+ else
66
+ "%s/%s" % [xpath_a, col[1]]
67
+ end
68
+ end
69
+
70
+ end
71
+
72
+ end
73
+
data.tar.gz.sig ADDED
Binary file
metadata ADDED
@@ -0,0 +1,90 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: xml_row_finder
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - James Robertson
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain:
11
+ - |
12
+ -----BEGIN CERTIFICATE-----
13
+ MIIEXjCCAsagAwIBAgIBATANBgkqhkiG9w0BAQsFADAsMSowKAYDVQQDDCFnZW1t
14
+ YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMjIwMTIxMjAxOTQyWhcN
15
+ MjMwMTIxMjAxOTQyWjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
16
+ cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQC+EN7D
17
+ 60MdhiAKWBY7jv7itPYA77Jw8h9gWR1Bh3w684JRc4UHIl9d5vjC3h1mdslJq3iQ
18
+ RZzllFz8os9f5d9CFjbIsbRW/oTshuYcAgVJCPIXq3+1GjTTasFHztvYI/y5tQfe
19
+ CAMCaH//0u1c+K/XLHG1r5UrMIO0vsaDV3jtYEsHpTxFOg/b1922sIGsw2O35+hL
20
+ 0Pjxlvl/EG9jIYlf+XRdNJAYNTu52YgFZ+uzAj2T8xxxHb7TuEtt3l2y6FKGiSpz
21
+ 8qJzzfPfYcsbpbpXpbv2XYk+gSGgBJTOgp5KWu2/IdZq33EgVBvQYA6xov9hy5Ls
22
+ mvr0xm5zeeC5CjRPwBK1ZxCQ5Nntf4fF57qGeg9YhlaH2sRtQXHrAevQ6Cxax0oJ
23
+ 3lOgU0RBH7zz17ItVFxb7Bd9teOsrZQfwAemV6WQPsfnYPpI3uKR5OLZlUdVlz8V
24
+ xnV4maNpknMfarx8f6s9Hj8AYB66K6ro6z1GzsQKj9QYmH+8VqlaK/rjSD8CAwEA
25
+ AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQUNuKD1Egp
26
+ 48/WyUVF8QxmbWYzP+gwJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
27
+ c29uLmV1MCYGA1UdEgQfMB2BG2dlbW1hc3RlckBqYW1lc3JvYmVydHNvbi5ldTAN
28
+ BgkqhkiG9w0BAQsFAAOCAYEAdTGz4gEgE4eU7tGyhFBeLvQv7RM4arJXFsxxISap
29
+ HNr2CyeckvJAkmWWNRQS/8G1QRxMMEIJCJw1mlKAUMYR7gAs8AiqdTwC/Q0WNr3/
30
+ AyUJaDXJ37SPT429sUNoJ9n/0/ChqwH6A4xU/S+owwNtvzBzy4S34vITDy1F5yeF
31
+ Uuy0aMjAqZGCQNOzVe0lbC0QCxm1OTRdZ4hGFdn5M0lvhPZXjWpLU2Ha5rnFyDyI
32
+ pluXqAxZwcOmUS3whRdEy20CRSIxKnznEpikm9Xc92RU5k/xhEsxzQgFA3jkGjs6
33
+ 52cQoCLdtaNELCj3WqC+q9TCvPY8j3JRJqCz37+Lp/asof/5/OlX1k8iVUMWRACw
34
+ qPdaJGkE6iXpro2Nfvj/069UeXNUGSlwROMB/YoDkbamR/+UFIkBXdTQmRnoos6z
35
+ a/pAsvo0jT6QTnSB7xzsx8LSFDT5tfHKR9Dcn1Y3R06fsh02JvwxaSAMgDBM2aFb
36
+ 2A7/BQ1hD7SU82VTxB1gFIHl
37
+ -----END CERTIFICATE-----
38
+ date: 2022-01-21 00:00:00.000000000 Z
39
+ dependencies:
40
+ - !ruby/object:Gem::Dependency
41
+ name: rexle
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '1.5'
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: 1.5.14
50
+ type: :runtime
51
+ prerelease: false
52
+ version_requirements: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - "~>"
55
+ - !ruby/object:Gem::Version
56
+ version: '1.5'
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ version: 1.5.14
60
+ description:
61
+ email: digital.robertson@gmail.com
62
+ executables: []
63
+ extensions: []
64
+ extra_rdoc_files: []
65
+ files:
66
+ - lib/xml_row_finder.rb
67
+ homepage: https://github.com/jrobertson/xml_row_finder
68
+ licenses:
69
+ - MIT
70
+ metadata: {}
71
+ post_install_message:
72
+ rdoc_options: []
73
+ require_paths:
74
+ - lib
75
+ required_ruby_version: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ version: '0'
80
+ required_rubygems_version: !ruby/object:Gem::Requirement
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ version: '0'
85
+ requirements: []
86
+ rubygems_version: 3.2.22
87
+ signing_key:
88
+ specification_version: 4
89
+ summary: Attempts to find repeating rows in XHTML and returns the associated xpath.
90
+ test_files: []
metadata.gz.sig ADDED
Binary file