rexleparser 0.9.10 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/lib/rexleparser.rb +183 -144
- data.tar.gz.sig +0 -0
- metadata +28 -28
- metadata.gz.sig +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a6291ec758a59d1737240656c502a96250c5fe6cdd230a452c220ba34c17f091
|
4
|
+
data.tar.gz: c9edbdc47b4e728598407ddd23da8d5622ed699069061742497d3017477e25bc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2665fb624f0b3fee4e4304c304c093900685e6708bfc62209fa54a0f0108e7e195844285fa7a677a91d83f0ee72c306ca8f785052cd7d6c51dba3ab1ea3950b9
|
7
|
+
data.tar.gz: 8c7d22a214d798f867d670c300d2b61c2235a17c437823ac7f5978dcb3c80015d3887dfc2f7f22b421d58d6b3b321eaa327ac931cb0448c06c1d1f71f70640ae
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data/lib/rexleparser.rb
CHANGED
@@ -7,20 +7,20 @@
|
|
7
7
|
class Attributes < Hash
|
8
8
|
|
9
9
|
class Value < String
|
10
|
-
|
10
|
+
|
11
11
|
def initialize(value)
|
12
12
|
#jr2020-04-30 super(value.gsub("'", '''))
|
13
13
|
super(value)
|
14
14
|
end
|
15
|
-
|
15
|
+
|
16
16
|
def <(val2)
|
17
17
|
self.to_f < val2.to_f
|
18
|
-
end
|
19
|
-
|
18
|
+
end
|
19
|
+
|
20
20
|
def >(val2)
|
21
21
|
self.to_f > val2.to_f
|
22
22
|
end
|
23
|
-
|
23
|
+
|
24
24
|
def inspect()
|
25
25
|
super().gsub('<','<',).gsub('>','>').gsub('&pos;',"'")
|
26
26
|
end
|
@@ -28,13 +28,13 @@ class Attributes < Hash
|
|
28
28
|
def to_s(unescape: true)
|
29
29
|
unescape ? self.gsub('&','&').gsub('&pos;',"'") : self
|
30
30
|
end
|
31
|
-
|
32
|
-
end
|
33
|
-
|
31
|
+
|
32
|
+
end
|
33
|
+
|
34
34
|
def initialize(h={})
|
35
35
|
super().merge! h
|
36
36
|
end
|
37
|
-
|
37
|
+
|
38
38
|
def []=(k,v)
|
39
39
|
super(k, k != :class ? Value.new(v) : v)
|
40
40
|
end
|
@@ -48,197 +48,236 @@ class Attributes < Hash
|
|
48
48
|
end
|
49
49
|
|
50
50
|
end
|
51
|
-
|
51
|
+
|
52
52
|
def merge(h)
|
53
53
|
|
54
|
-
h2 = h.inject({}) do |r, kv|
|
54
|
+
h2 = h.inject({}) do |r, kv|
|
55
55
|
k, raw_v = kv
|
56
56
|
v = raw_v.is_a?(String) ? Value.new(raw_v) : raw_v
|
57
|
-
r.merge(k => v)
|
57
|
+
r.merge(k => v)
|
58
58
|
end
|
59
|
-
|
59
|
+
|
60
60
|
super(h2)
|
61
|
-
|
61
|
+
|
62
62
|
end
|
63
63
|
end
|
64
64
|
|
65
|
+
|
65
66
|
class RexleParserException < Exception
|
66
67
|
end
|
67
68
|
|
68
69
|
class RexleParser
|
69
70
|
|
70
|
-
attr_reader :
|
71
|
+
attr_reader :stack
|
71
72
|
|
72
|
-
def initialize(
|
73
|
-
|
74
|
-
super()
|
75
|
-
s = raw_s.clone.strip
|
76
|
-
return if s.empty?
|
73
|
+
def initialize(raws, debug: false)
|
77
74
|
|
75
|
+
s = raws.strip
|
76
|
+
@debug = debug
|
77
|
+
@a = []
|
78
|
+
@stack = []
|
79
|
+
|
78
80
|
raw_xml, raw_instrctns = if s.lines.first =~ /<?xml/ then
|
79
|
-
s.split(/(
|
81
|
+
s.split(/(?<=\?>)/,2).reverse
|
80
82
|
else
|
81
83
|
s
|
82
84
|
end
|
85
|
+
puts 'raw_xml: ' + raw_xml.inspect if @debug
|
83
86
|
@instructions = raw_instrctns ? \
|
84
87
|
raw_instrctns.scan(/<\?([\w-]+) ([^\?]+)/) : []
|
85
88
|
@doctype = s.slice!(/<!DOCTYPE html>\n?/) if s.lines.first =~ /<\!DOCTYPE/
|
86
|
-
|
89
|
+
|
90
|
+
# scancom is run twice because we 1st check for comment tags and then cdata tags
|
91
|
+
@a = parse(scancom(scancom(raw_xml), type: :cdata)).flatten(1)
|
87
92
|
|
88
93
|
end
|
89
94
|
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
def scan_next(r, tagname)
|
94
|
-
|
95
|
-
j = tagname
|
96
|
-
|
97
|
-
if r[0] == '>' then
|
98
|
-
|
99
|
-
# end tag match
|
100
|
-
tag = r[/^>[^<]+</]
|
101
|
-
|
102
|
-
if tag[1][/[ \w"']/] and tag[-2] != '/' then
|
103
|
-
|
104
|
-
# is it the end tag to match the start tag?
|
105
|
-
tag = r.slice!(/^>[^<]+</)
|
106
|
-
end_tag = tag[/^>[^>]*#{j}<$/]
|
107
|
-
|
108
|
-
if end_tag then
|
109
|
-
|
110
|
-
j = nil
|
111
|
-
return [:end_tag, end_tag]
|
112
|
-
|
113
|
-
elsif tag[/^>[^>]*\w+<$/] then
|
114
|
-
# broken tag found
|
115
|
-
broken_tag = tag
|
116
|
-
return [:child, [nil, [], broken_tag]] if broken_tag
|
117
|
-
else
|
118
|
-
|
119
|
-
text, newtag = tag.sub('>',';tg&').split(/>/,2)
|
120
|
-
|
121
|
-
if newtag then
|
122
|
-
tag = newtag
|
123
|
-
r.prepend '>' + tag
|
124
|
-
end
|
125
|
-
|
126
|
-
return [:child, text]
|
127
|
-
end
|
128
|
-
elsif r[0,3] == '>--' then # comment tag found
|
95
|
+
def to_a()
|
96
|
+
@a
|
97
|
+
end
|
129
98
|
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
99
|
+
private
|
100
|
+
|
101
|
+
def ehead(raws)
|
102
|
+
|
103
|
+
s = raws.lstrip
|
104
|
+
puts '_s: ' + s.inspect if @debug
|
105
|
+
# fetch the element head
|
106
|
+
tag = s =~ /<[^>]+\/?>/
|
107
|
+
s2 = s[tag+1..-1]
|
108
|
+
tagb = s2 =~ />/
|
109
|
+
return unless tag
|
110
|
+
|
111
|
+
len = tagb+1-tag
|
112
|
+
|
113
|
+
if @debug then
|
114
|
+
puts 'ehead()/detail: ' + [tag, tagb, len, s[tag,len+1]].inspect
|
115
|
+
end
|
134
116
|
|
135
|
-
|
117
|
+
[s[tag,len+1], s[len+1..-1]]
|
136
118
|
|
137
|
-
|
119
|
+
end
|
138
120
|
|
139
|
-
|
121
|
+
def get_attributes(raw_attributes)
|
122
|
+
|
123
|
+
r1 = /([\w\-:\(\)]+\='[^']*)'/
|
124
|
+
r2 = /([\w\-:\(\)]+\="[^"]*)"/
|
125
|
+
|
126
|
+
r = raw_attributes.scan(/#{r1}|#{r2}/).map(&:compact)\
|
127
|
+
.flatten.inject(Attributes.new) do |r, x|
|
128
|
+
attr_name, raw_val = x.split(/=/,2)
|
129
|
+
val = attr_name != 'class' ? raw_val[1..-1] : raw_val[1..-1].split
|
130
|
+
r.merge(attr_name.to_sym => val)
|
131
|
+
end
|
140
132
|
|
141
|
-
|
142
|
-
|
143
|
-
s = r.slice!(0,i)
|
144
|
-
r.slice!(0,9)
|
133
|
+
return r
|
134
|
+
end
|
145
135
|
|
146
|
-
|
136
|
+
def parse(raws, a=[], cur=nil)
|
147
137
|
|
148
|
-
|
138
|
+
s = raws #.lstrip
|
139
|
+
|
140
|
+
if @debug then
|
141
|
+
puts '.parse() s: ' + s.inspect[0..600]
|
142
|
+
puts '.parse() a: ' + a.inspect[0..699]
|
143
|
+
puts '.parse() cur: ' + cur.inspect[0..799]
|
144
|
+
end
|
149
145
|
|
150
|
-
|
146
|
+
# if self-closing tag
|
147
|
+
if s =~ /^<[^<]+\/>/ then
|
151
148
|
|
152
|
-
|
149
|
+
tag = s[/^<[^<]+\/>/]
|
150
|
+
puts 'parse() self-closing/tag: ' + tag.inspect if @debug
|
151
|
+
tail = $'
|
152
|
+
|
153
|
+
if @debug then
|
154
|
+
puts 'parse() self-closing tag found'
|
155
|
+
puts 'parse()/tail: ' + tail.inspect
|
156
|
+
end
|
157
|
+
|
158
|
+
a2 = parsetag(tag)
|
159
|
+
puts '_a: ' + a.inspect if @debug
|
160
|
+
cur ? a.last << a2 : a << a2
|
161
|
+
|
162
|
+
parse(tail, a, cur)
|
163
|
+
|
164
|
+
# is it the head?
|
165
|
+
elsif (s =~ /^<[^\/>]+>/) == 0 then
|
166
|
+
|
167
|
+
puts 'parse()/head found' if @debug
|
168
|
+
|
169
|
+
tag, tail = ehead(s)
|
170
|
+
|
171
|
+
if @debug then
|
172
|
+
puts 'parse() tag: ' + tag.inspect
|
173
|
+
puts 'parse() tail: ' + tail.inspect
|
174
|
+
end
|
175
|
+
# add it to the list
|
176
|
+
a2 = parsetag(tag)
|
153
177
|
|
178
|
+
puts '_cur: ' + cur.inspect if @debug
|
179
|
+
if cur then
|
180
|
+
cur << a2
|
181
|
+
cur2 = cur.last
|
154
182
|
else
|
183
|
+
a << a2
|
184
|
+
cur2 = a.last
|
185
|
+
end
|
155
186
|
|
156
|
-
|
157
|
-
i = r =~ />(?:[\-\/"'\w]|\]\])/ # collect until a tag is found or a CDATA element
|
158
|
-
text = r.slice!(0,i)
|
159
|
-
|
160
|
-
return [:child, text] if text
|
161
|
-
|
162
|
-
end # end of tag match
|
163
|
-
|
164
|
-
else
|
165
|
-
|
166
|
-
# it's a text value
|
167
|
-
i = r =~ />(?:[\-\/"'\w]|\]\])/ # collect until a tag is found or a CDATA element
|
168
|
-
text = r.slice!(0,i)
|
169
|
-
|
170
|
-
return [:child, text] if text
|
171
|
-
end
|
172
|
-
end
|
187
|
+
puts '_a: ' + a.inspect if @debug
|
173
188
|
|
174
|
-
|
189
|
+
# add it to the stack
|
190
|
+
@stack.push cur2
|
175
191
|
|
176
|
-
|
177
|
-
|
178
|
-
|
192
|
+
parse(tail, a, cur2)
|
193
|
+
|
194
|
+
elsif (s =~ /^[^<]/) == 0
|
179
195
|
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
196
|
+
puts 'parse() we have text!' if @debug
|
197
|
+
text = raws[/[^<]+/m] #
|
198
|
+
remaining = $'
|
199
|
+
|
200
|
+
if @debug then
|
201
|
+
puts 'parse() text: ' + text.inspect
|
202
|
+
puts 'cur tag: ' + cur[0].inspect
|
203
|
+
end
|
204
|
+
|
205
|
+
cur << if cur[0][0] == '!' then
|
206
|
+
text.gsub('<','<').gsub('>','>').gsub('&','&')
|
207
|
+
else
|
208
|
+
text.gsub(/>/,'>').gsub(/</, '<')
|
209
|
+
end
|
184
210
|
|
185
|
-
|
211
|
+
puts 'remaining: ' + remaining.inspect if @debug
|
212
|
+
parse(remaining, a, cur) if remaining.length > 0
|
213
|
+
|
214
|
+
|
215
|
+
# is it a closing tag?
|
216
|
+
elsif s =~ /^\s?<\/\w+>/m
|
217
|
+
|
218
|
+
tail = s[/^\s*<\/\w+>(.*)/m,1]
|
219
|
+
|
220
|
+
if @debug then
|
221
|
+
puts 'parse()/closing tag ' + s[/^\s*<\/\w+>/].inspect
|
222
|
+
puts '>a: ' + a.inspect
|
223
|
+
end
|
224
|
+
|
225
|
+
@stack.pop
|
226
|
+
#a << []
|
227
|
+
parse(tail, a, @stack.last)
|
228
|
+
|
229
|
+
elsif s.empty? and @stack.length > 0
|
230
|
+
|
231
|
+
puts 'parse() no end tag!' if @debug
|
186
232
|
|
187
|
-
unless start_tag[1..-3][/\w+$/] then
|
188
|
-
raise RexleParserException, 'invalid closing tag found ' + \
|
189
|
-
start_tag.reverse + '; context: ' + r[0..120].reverse.inspect
|
190
233
|
end
|
191
234
|
|
192
|
-
|
235
|
+
return a
|
193
236
|
|
194
|
-
key, res = scan_next r, tagname
|
195
|
-
|
196
|
-
case key
|
197
|
-
when :end_tag
|
198
|
-
end_tag = res
|
199
|
-
r2 = [start_tag, children, end_tag]
|
200
|
-
end_tag = nil
|
201
|
-
|
202
|
-
return r2
|
203
|
-
when :child
|
204
|
-
children << res
|
205
|
-
when :newnode
|
206
|
-
children << parse_node(r, tagname)
|
207
|
-
else
|
208
|
-
break
|
209
|
-
end
|
210
|
-
end
|
211
|
-
|
212
|
-
[start_tag, children, end_tag]
|
213
237
|
end
|
214
238
|
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
239
|
+
# we parse the tag because it contains more than just the name it often contains attributes
|
240
|
+
#
|
241
|
+
def parsetag(s)
|
242
|
+
|
243
|
+
puts 'parsetag:' + s.inspect if @debug
|
244
|
+
rawtagname, rawattr = s[1..-2].sub(/\/$/,'').match(/^(\w+) *(.*)/)\
|
245
|
+
.values_at(1,2)
|
246
|
+
|
247
|
+
tagname = case rawtagname.to_sym
|
248
|
+
when :_comment
|
249
|
+
'!-'
|
250
|
+
when :_cdata
|
251
|
+
'!['
|
252
|
+
else
|
253
|
+
rawtagname
|
225
254
|
end
|
226
255
|
|
227
|
-
|
256
|
+
[tagname, get_attributes(rawattr)]
|
228
257
|
end
|
229
258
|
|
230
|
-
def
|
259
|
+
def scancom(s, type=:comment)
|
231
260
|
|
232
|
-
|
233
|
-
|
234
|
-
|
261
|
+
tag1 = ['<!--', '-->', 'comment', '<!--']
|
262
|
+
tag2 = ['<![CDATA[', '\]\]>', 'cdata', '\<!\[CDATA\[']
|
263
|
+
tag = type == :comment ? tag1 : tag2
|
235
264
|
|
236
|
-
tag
|
265
|
+
#puts 'tag: ' + tag.inspect
|
266
|
+
istart = s =~ /#{tag[3]}/
|
267
|
+
return s unless istart
|
237
268
|
|
238
|
-
|
269
|
+
iend = s =~ /#{tag[1]}/
|
270
|
+
comment ="<_%s>%s</_%s>" % [tag[2], s[istart+tag[0].length.. iend-1].gsub('&','&').gsub('<','<').gsub('>','>'), tag[2]]
|
239
271
|
|
240
|
-
|
272
|
+
if @debug then
|
273
|
+
puts 'comment: ' + comment.inspect
|
274
|
+
# construct the new string
|
275
|
+
puts 'istart: ' + istart.inspect
|
276
|
+
end
|
277
|
+
|
278
|
+
s3 = s[0,istart].to_s + comment + s[iend+3..-1]
|
279
|
+
scancom(s3, type)
|
241
280
|
|
242
|
-
return [tag[/[!\-\w:\[]+/], get_attributes(tag), *r]
|
243
281
|
end
|
282
|
+
|
244
283
|
end
|
data.tar.gz.sig
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rexleparser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Robertson
|
@@ -10,32 +10,33 @@ bindir: bin
|
|
10
10
|
cert_chain:
|
11
11
|
- |
|
12
12
|
-----BEGIN CERTIFICATE-----
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
13
|
+
MIIEljCCAv6gAwIBAgIBATANBgkqhkiG9w0BAQsFADBIMRIwEAYDVQQDDAlnZW1t
|
14
|
+
YXN0ZXIxHjAcBgoJkiaJk/IsZAEZFg5qYW1lc3JvYmVydHNvbjESMBAGCgmSJomT
|
15
|
+
8ixkARkWAmV1MB4XDTIzMDMwNzEyNDM0MVoXDTI0MDMwNjEyNDM0MVowSDESMBAG
|
16
|
+
A1UEAwwJZ2VtbWFzdGVyMR4wHAYKCZImiZPyLGQBGRYOamFtZXNyb2JlcnRzb24x
|
17
|
+
EjAQBgoJkiaJk/IsZAEZFgJldTCCAaIwDQYJKoZIhvcNAQEBBQADggGPADCCAYoC
|
18
|
+
ggGBAMXdVvebuQa1+janwRx6yjABUKs2WSd6ns81LBol0KgH8Lmjj5CdIJHK/IFZ
|
19
|
+
pcjvbJCSNJS9eREO4RnHkJTUpYE6xgTboCsSMdTpJU3MK2Y+PHXQu5YJHBQQBWSe
|
20
|
+
LORpuKuhQuhU+oQgxnuszksIO1UBU+Xh0D5dntbWpiFBGPzTctoBTtJqBdClZwXc
|
21
|
+
s1mAmXhAkeK2hmT0Rw/IY2CqZAMeMbrVZBaqazYvqXvfDisRPMMZVZMz9al3w6IE
|
22
|
+
L9E4tDbU1sExjUgVGB+BIV6SIG5kYrOzpDKnZXhvPbmUR08iZeTe0IpUIFMIYPIy
|
23
|
+
kPJxO45OaxLwnabV+jC38P2CV4Pbx6dij/M/mWisD/az4kzmw1kGUMGJiPIn/XRX
|
24
|
+
mLOOxuCQxHDts+7tvD+/wTtSIxklsvVKz49QH1ybNrOdoYQuB8qNnLPFzKFr+5SC
|
25
|
+
ojuaJ45mf/Uv3Orps8LXj8WmOBvbJWC1/Lglhy+hhGV/7gh1EKVMpx6AwKcJ1+85
|
26
|
+
R19PMQIDAQABo4GKMIGHMAkGA1UdEwQCMAAwCwYDVR0PBAQDAgSwMB0GA1UdDgQW
|
27
|
+
BBQiPSrne0dHxLynj/1izBmEwv/lkTAmBgNVHREEHzAdgRtnZW1tYXN0ZXJAamFt
|
28
|
+
ZXNyb2JlcnRzb24uZXUwJgYDVR0SBB8wHYEbZ2VtbWFzdGVyQGphbWVzcm9iZXJ0
|
29
|
+
c29uLmV1MA0GCSqGSIb3DQEBCwUAA4IBgQBWIQG28MQXuVSHsAw/rQuaE1FpOmz0
|
30
|
+
AXWhHK1oDWxkoPTBIoLil2M2PI3htcBBRLUzyli9XFvqjzuO+J8LpWqs2iddxjXP
|
31
|
+
3xtUg0h+0urVRk00xwsnPLppwT7VxvycQifN31En8rMEzd24V5FKkn90brdokVQC
|
32
|
+
5aL/9LD3S5k3t4AyUcGeOFuU+k87lEz8bzKk2wjOjzpgxjNyqeK6h0iFeA94rhnp
|
33
|
+
HPaebr7ytgAR3dKU/Zr/gmZdQroli86LaOqGK1AJQ9E1RFBKwKluNa27dR3VyqBA
|
34
|
+
VG1Z/9QNxm0ivqKr7samwkNUGiql+s4CPZndbJD/hmDDdmCtYf1mywCAUOh+DrfU
|
35
|
+
iKwWhO5Qmo/RUOvr0a4KtP5i0N7qY1LFMSCpfCRTS9zTlfTIjs1ipAnWhLcqp5Js
|
36
|
+
NMSYHad41dFYLrVyE7mdZdIVlFqIdG+V1x7iY+zPpg7FS/8EUnK5zUxMC0YoLeK2
|
37
|
+
hcHycb1NL9Ujb7onXEop5Dobym6xO6V+yBY=
|
37
38
|
-----END CERTIFICATE-----
|
38
|
-
date:
|
39
|
+
date: 2023-03-07 00:00:00.000000000 Z
|
39
40
|
dependencies: []
|
40
41
|
description:
|
41
42
|
email: digital.robertson@gmail.com
|
@@ -63,8 +64,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
63
64
|
- !ruby/object:Gem::Version
|
64
65
|
version: '0'
|
65
66
|
requirements: []
|
66
|
-
|
67
|
-
rubygems_version: 2.7.10
|
67
|
+
rubygems_version: 3.4.6
|
68
68
|
signing_key:
|
69
69
|
specification_version: 4
|
70
70
|
summary: Rexleparser is an XML parser used by the Rexle gem
|
metadata.gz.sig
CHANGED
@@ -1,2 +1,3 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
,����<�a��1�����$n�]��ᆧAWεp����J���z{�0aM��p�٥tP�u���,k���l�ygܖ|8��P�t"}����ej
|
2
|
+
+֢��PJ{ݍ��}ׄ��vj� �a�m,�G��]��ӡ� ���h&r����C*�r��3I=�i��O�ɂ�7�]~��?����CH��Z��j���*�9���ե�ӱM��R+��a����n�3ͣJ��Lm�,��mU&���2>@G���a�OQ�0�b�&��%�.��
|
3
|
+
|d�!lDO#�3{�oV��+�(��7<j��pc�"�z���֤�ϗ'�m�si�0�}��y��eP1�:���Dcյ���6=�R,�s�{�7�d}�jj�h�f�j��
|