xml_row_finder 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: cfc152e03ec51b624550b4b4bf031eae3e7205c42e1dccbdd4d45219d684860d
4
+ data.tar.gz: 77be8ae4da89aded4ec73a653e9fe1e3d9815a144a29a37a73158b97174da984
5
+ SHA512:
6
+ metadata.gz: e83c208f4dfeac7827b9f8ca453baba4f19ff4be9d4341bbd15f46695948afd7af42ae5ad0c71efb994b58832c46b9cf836fcb2d115ffb126a49ebfc26065772
7
+ data.tar.gz: 24335ada2416257aa9caebd29b687562e92fe2d0ca2ac71f59824448c98eee018e15f95ee60352ed326440a50cb5445d3fb5bd4cbb1a0c47eed58ffc85ff12ec
checksums.yaml.gz.sig ADDED
@@ -0,0 +1 @@
1
+ ;�Ac.���n�g�r; �%�6,z{ma��j��~�zJJ����Ù4�r�c{���E�޵��i�lDp��:R=[W�mJ����=#M�b�F���n�@�E��O�F���_FK�-��4Z�*wT��x�RH;�L�G�d�&��Y�遲�Lo&��7�Eב-$:�1�؏��U���)y��"�v{��/nD*:%��� �^�kz�8�n�%m14Ȯ�I�c\�YGNu�軀�J���{����Xs���=�@V;���M���ʺ���p�p����������Kb<#�J6��}�k���X��~��Hc�b���Ҏ���=� ��L��N��Գ*k�Rw��������B������jd�F�`WXx����J
@@ -0,0 +1,73 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # file: xml_row_finder.rb
4
+
5
+ require 'rexle'
6
+ require 'rexml'
7
+ include REXML
8
+
9
+
10
+ class XMLRowFinder
11
+
12
+ attr_reader :to_a
13
+
14
+ def initialize(s, debug: false)
15
+
16
+ @debug = debug
17
+ doc = Rexle.new(s)
18
+
19
+ a = []
20
+
21
+ doc.root.each_recursive do |e|
22
+ e.attributes.delete
23
+ a << e.backtrack.to_xpath
24
+ end
25
+
26
+ a2 = a.select{ |e| a.count(e) > 1 }.map {|x| x.split('/')}.uniq
27
+
28
+ # remove parent nodes on the same branch
29
+ #
30
+ a2.reject!.with_index do |x,i|
31
+ next if i == a2.length-1
32
+ x == a2[i+1][0..-2]
33
+ end
34
+
35
+ # remove elements from rows which only exist once in the document
36
+ #
37
+ a3 = a2.map do |row|
38
+ row.reject do |x|
39
+ found = doc.root.xpath('//' + x)
40
+ found.length < 2
41
+ end
42
+ end
43
+
44
+ # add parent node to the row as a reference for the xpath
45
+ #
46
+ a4 = a3.map.with_index do |row,i|
47
+ a2[i][-(row.length+1)..-1]
48
+ end
49
+
50
+ # find the parent node attributes
51
+ #
52
+ @to_a = a4.map do |col|
53
+
54
+ # currently using REXML for this XPath since there is a bug in
55
+ # Rexle when attempting the following
56
+ #
57
+ doc2 = Document.new(s)
58
+ xpath = "//%s[%s]" % [col[0], col[1..-1].join('/')]
59
+ puts 'xpath: ' + xpath.inspect if @debug
60
+ r = XPath.first(doc2, xpath)
61
+ xpath_a = BacktrackXPath.new(r).to_xpath
62
+
63
+ if col.length >= 3
64
+ "%s/%s[%s]" % [xpath_a, col[1], col[2..-1].join('/')]
65
+ else
66
+ "%s/%s" % [xpath_a, col[1]]
67
+ end
68
+ end
69
+
70
+ end
71
+
72
+ end
73
+
data.tar.gz.sig ADDED
Binary file
metadata ADDED
@@ -0,0 +1,90 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: xml_row_finder
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - James Robertson
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain:
11
+ - |
12
+ -----BEGIN CERTIFICATE-----
13
+ MIIEXjCCAsagAwIBAgIBATANBgkqhkiG9w0BAQsFADAsMSowKAYDVQQDDCFnZW1t
14
+ YXN0ZXIvREM9amFtZXNyb2JlcnRzb24vREM9ZXUwHhcNMjIwMTIxMjAxOTQyWhcN
15
+ MjMwMTIxMjAxOTQyWjAsMSowKAYDVQQDDCFnZW1tYXN0ZXIvREM9amFtZXNyb2Jl
16
+ cnRzb24vREM9ZXUwggGiMA0GCSqGSIb3DQEBAQUAA4IBjwAwggGKAoIBgQC+EN7D
17
+ 60MdhiAKWBY7jv7itPYA77Jw8h9gWR1Bh3w684JRc4UHIl9d5vjC3h1mdslJq3iQ
18
+ RZzllFz8os9f5d9CFjbIsbRW/oTshuYcAgVJCPIXq3+1GjTTasFHztvYI/y5tQfe
19
+ CAMCaH//0u1c+K/XLHG1r5UrMIO0vsaDV3jtYEsHpTxFOg/b1922sIGsw2O35+hL
20
+ 0Pjxlvl/EG9jIYlf+XRdNJAYNTu52YgFZ+uzAj2T8xxxHb7TuEtt3l2y6FKGiSpz
21
+ 8qJzzfPfYcsbpbpXpbv2XYk+gSGgBJTOgp5KWu2/IdZq33EgVBvQYA6xov9hy5Ls
22
+ mvr0xm5zeeC5CjRPwBK1ZxCQ5Nntf4fF57qGeg9YhlaH2sRtQXHrAevQ6Cxax0oJ
23
+ 3lOgU0RBH7zz17ItVFxb7Bd9teOsrZQfwAemV6WQPsfnYPpI3uKR5OLZlUdVlz8V
24
+ xnV4maNpknMfarx8f6s9Hj8AYB66K6ro6z1GzsQKj9QYmH+8VqlaK/rjSD8CAwEA
25
+ AaOBijCBhzAJBgNVHRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQUNuKD1Egp
26
+ 48/WyUVF8QxmbWYzP+gwJgYDVR0RBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
27
+ c29uLmV1MCYGA1UdEgQfMB2BG2dlbW1hc3RlckBqYW1lc3JvYmVydHNvbi5ldTAN
28
+ BgkqhkiG9w0BAQsFAAOCAYEAdTGz4gEgE4eU7tGyhFBeLvQv7RM4arJXFsxxISap
29
+ HNr2CyeckvJAkmWWNRQS/8G1QRxMMEIJCJw1mlKAUMYR7gAs8AiqdTwC/Q0WNr3/
30
+ AyUJaDXJ37SPT429sUNoJ9n/0/ChqwH6A4xU/S+owwNtvzBzy4S34vITDy1F5yeF
31
+ Uuy0aMjAqZGCQNOzVe0lbC0QCxm1OTRdZ4hGFdn5M0lvhPZXjWpLU2Ha5rnFyDyI
32
+ pluXqAxZwcOmUS3whRdEy20CRSIxKnznEpikm9Xc92RU5k/xhEsxzQgFA3jkGjs6
33
+ 52cQoCLdtaNELCj3WqC+q9TCvPY8j3JRJqCz37+Lp/asof/5/OlX1k8iVUMWRACw
34
+ qPdaJGkE6iXpro2Nfvj/069UeXNUGSlwROMB/YoDkbamR/+UFIkBXdTQmRnoos6z
35
+ a/pAsvo0jT6QTnSB7xzsx8LSFDT5tfHKR9Dcn1Y3R06fsh02JvwxaSAMgDBM2aFb
36
+ 2A7/BQ1hD7SU82VTxB1gFIHl
37
+ -----END CERTIFICATE-----
38
+ date: 2022-01-21 00:00:00.000000000 Z
39
+ dependencies:
40
+ - !ruby/object:Gem::Dependency
41
+ name: rexle
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '1.5'
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: 1.5.14
50
+ type: :runtime
51
+ prerelease: false
52
+ version_requirements: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - "~>"
55
+ - !ruby/object:Gem::Version
56
+ version: '1.5'
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ version: 1.5.14
60
+ description:
61
+ email: digital.robertson@gmail.com
62
+ executables: []
63
+ extensions: []
64
+ extra_rdoc_files: []
65
+ files:
66
+ - lib/xml_row_finder.rb
67
+ homepage: https://github.com/jrobertson/xml_row_finder
68
+ licenses:
69
+ - MIT
70
+ metadata: {}
71
+ post_install_message:
72
+ rdoc_options: []
73
+ require_paths:
74
+ - lib
75
+ required_ruby_version: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ version: '0'
80
+ required_rubygems_version: !ruby/object:Gem::Requirement
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ version: '0'
85
+ requirements: []
86
+ rubygems_version: 3.2.22
87
+ signing_key:
88
+ specification_version: 4
89
+ summary: Attempts to find repeating rows in XHTML and returns the associated xpath.
90
+ test_files: []
metadata.gz.sig ADDED
Binary file