rexleparser 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/lib/rexleparser.rb +127 -166
- data.tar.gz.sig +0 -0
- metadata +2 -2
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a98589dfd93cc2c5a4946f9dc5a2bc5418bd30cbfc4662330d7698bf13b1e107
|
4
|
+
data.tar.gz: 92a35568634aa47b9032aeb26cc193830adee7ba0c3d48dc2a53458119671791
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1132a312974ad5eaef5b1267bf18f35e0228138a3247a867e6e9de4f366f8c654b6f24220e68922cca90698520c07bf45cdc52d44390fedf50e8c97df3ac0666
|
7
|
+
data.tar.gz: 211dddd0cd0411e4311541dd88e8742dbb1f32a2d5cdd6a25b48d90ef25ffd4a4bab7525180c5bdfe7a0b8afb1610e4b532e2fefec4980acae2d22dc160b0602
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data/lib/rexleparser.rb
CHANGED
@@ -62,222 +62,183 @@ class Attributes < Hash
|
|
62
62
|
end
|
63
63
|
end
|
64
64
|
|
65
|
-
|
66
65
|
class RexleParserException < Exception
|
67
66
|
end
|
68
67
|
|
69
68
|
class RexleParser
|
70
69
|
|
71
|
-
attr_reader :
|
70
|
+
attr_reader :instructions, :doctype, :to_a
|
72
71
|
|
73
|
-
def initialize(
|
72
|
+
def initialize(raw_s)
|
74
73
|
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
@stack = []
|
74
|
+
super()
|
75
|
+
s = raw_s.clone.strip
|
76
|
+
return if s.empty?
|
79
77
|
|
80
78
|
raw_xml, raw_instrctns = if s.lines.first =~ /<?xml/ then
|
81
|
-
s.split(/(
|
79
|
+
s.split(/(?=\?>\s*<\w)/,2).reverse
|
82
80
|
else
|
83
81
|
s
|
84
82
|
end
|
85
|
-
puts 'raw_xml: ' + raw_xml.inspect if @debug
|
86
83
|
@instructions = raw_instrctns ? \
|
87
84
|
raw_instrctns.scan(/<\?([\w-]+) ([^\?]+)/) : []
|
88
85
|
@doctype = s.slice!(/<!DOCTYPE html>\n?/) if s.lines.first =~ /<\!DOCTYPE/
|
89
|
-
|
90
|
-
# scancom is run twice because we 1st check for comment tags and then cdata tags
|
91
|
-
@a = parse(scancom(scancom(raw_xml), type: :cdata)).flatten(1)
|
92
|
-
|
93
|
-
end
|
86
|
+
@to_a = reverse(parse_node(raw_xml.strip.reverse))
|
94
87
|
|
95
|
-
def to_a()
|
96
|
-
@a
|
97
88
|
end
|
98
|
-
|
89
|
+
|
99
90
|
private
|
91
|
+
|
100
92
|
|
101
|
-
def
|
93
|
+
def scan_next(r, tagname)
|
102
94
|
|
103
|
-
|
104
|
-
puts '_s: ' + s.inspect if @debug
|
105
|
-
# fetch the element head
|
106
|
-
tag = s =~ /<[^>]+\/?>/
|
107
|
-
s2 = s[tag+1..-1]
|
108
|
-
tagb = s2 =~ />/
|
109
|
-
return unless tag
|
95
|
+
j = tagname
|
110
96
|
|
111
|
-
|
112
|
-
|
113
|
-
if @debug then
|
114
|
-
puts 'ehead()/detail: ' + [tag, tagb, len, s[tag,len+1]].inspect
|
115
|
-
end
|
97
|
+
if r[0] == '>' then
|
116
98
|
|
117
|
-
|
99
|
+
# end tag match
|
100
|
+
tag = r[/^>[^<]+</]
|
101
|
+
|
102
|
+
if tag[1][/[ \w"']/] and tag[-2] != '/' then
|
118
103
|
|
119
|
-
|
104
|
+
# is it the end tag to match the start tag?
|
105
|
+
tag = r.slice!(/^>[^<]+</)
|
106
|
+
end_tag = tag[/^>[^>]*#{j}<$/]
|
120
107
|
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
108
|
+
if end_tag then
|
109
|
+
|
110
|
+
j = nil
|
111
|
+
return [:end_tag, end_tag]
|
112
|
+
|
113
|
+
elsif tag[/^>[^>]*\w+<$/] then
|
114
|
+
# broken tag found
|
115
|
+
broken_tag = tag
|
116
|
+
return [:child, [nil, [], broken_tag]] if broken_tag
|
117
|
+
else
|
118
|
+
|
119
|
+
text, newtag = tag.sub('>',';tg&').split(/>/,2)
|
120
|
+
|
121
|
+
if newtag then
|
122
|
+
tag = newtag
|
123
|
+
r.prepend '>' + tag
|
124
|
+
end
|
125
|
+
|
126
|
+
return [:child, text]
|
127
|
+
end
|
128
|
+
elsif r[0,3] == '>--' then # comment tag found
|
129
|
+
|
130
|
+
r.slice!(0,3)
|
131
|
+
i = r =~ /(\-\-!<)/
|
132
|
+
s = r.slice!(0,i)
|
133
|
+
r.slice!(0,4)
|
132
134
|
|
133
|
-
|
134
|
-
|
135
|
+
tagname, content = ['-!',s]
|
136
|
+
|
137
|
+
return [:child, [">#{tagname}<", [content], ">#{tagname}/<"]]
|
138
|
+
|
139
|
+
elsif r[0,3] == '>]]' then # CDATA tag found
|
135
140
|
|
136
|
-
|
141
|
+
r.slice!(0,3)
|
142
|
+
i = r =~ /(\[ATADC\[!<)/
|
143
|
+
s = r.slice!(0,i)
|
144
|
+
r.slice!(0,9)
|
137
145
|
|
138
|
-
|
139
|
-
|
140
|
-
if @debug then
|
141
|
-
puts '.parse() s: ' + s.inspect[0..600]
|
142
|
-
puts '.parse() a: ' + a.inspect[0..699]
|
143
|
-
puts '.parse() cur: ' + cur.inspect[0..799]
|
144
|
-
end
|
146
|
+
tagname, content = ['[!',s]
|
145
147
|
|
146
|
-
|
147
|
-
|
148
|
+
return [:child, [">#{tagname}<", [content], ">#{tagname}/<"]]
|
149
|
+
|
150
|
+
elsif tag[/>\/|\/<$/] or tag[/^>.*[\w!]+\/<$/] then
|
151
|
+
|
152
|
+
return [:newnode]
|
153
|
+
|
154
|
+
else
|
155
|
+
|
156
|
+
r.sub!('>',';tg&')
|
157
|
+
i = r =~ />(?:[\-\/"'\w]|\]\])/ # collect until a tag is found or a CDATA element
|
158
|
+
text = r.slice!(0,i)
|
148
159
|
|
149
|
-
|
150
|
-
puts 'parse() self-closing/tag: ' + tag.inspect if @debug
|
151
|
-
tail = $'
|
152
|
-
|
153
|
-
if @debug then
|
154
|
-
puts 'parse() self-closing tag found'
|
155
|
-
puts 'parse()/tail: ' + tail.inspect
|
156
|
-
end
|
157
|
-
|
158
|
-
a2 = parsetag(tag)
|
159
|
-
puts '_a: ' + a.inspect if @debug
|
160
|
-
cur ? a.last << a2 : a << a2
|
161
|
-
|
162
|
-
parse(tail, a, cur)
|
160
|
+
return [:child, text] if text
|
163
161
|
|
164
|
-
|
165
|
-
elsif (s =~ /^<[^\/>]+>/) == 0 then
|
162
|
+
end # end of tag match
|
166
163
|
|
167
|
-
|
164
|
+
else
|
168
165
|
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
puts 'parse() tag: ' + tag.inspect
|
173
|
-
puts 'parse() tail: ' + tail.inspect
|
174
|
-
end
|
175
|
-
# add it to the list
|
176
|
-
a2 = parsetag(tag)
|
166
|
+
# it's a text value
|
167
|
+
i = r =~ />(?:[\-\/"'\w]|\]\])/ # collect until a tag is found or a CDATA element
|
168
|
+
text = r.slice!(0,i)
|
177
169
|
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
170
|
+
return [:child, text] if text
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
def parse_node(r, j=nil)
|
175
|
+
|
176
|
+
return unless r.length > 0
|
177
|
+
tag = r.slice!(/^>[^<]+</) if (r =~ /^>[^<]+</) == 0
|
178
|
+
tagname = tag[/([\w!:]+)\/?<$/,1]
|
186
179
|
|
187
|
-
|
180
|
+
# self closing tag?
|
181
|
+
if tag[/^>\/.*#{tagname}<$/m] then
|
182
|
+
return [">/#{tagname}<", [], "#{tag.sub(/>\//,'>')}"]
|
183
|
+
end
|
188
184
|
|
189
|
-
|
190
|
-
@stack.push cur2
|
185
|
+
start_tag, children, end_tag = tag, [], nil
|
191
186
|
|
192
|
-
|
193
|
-
|
194
|
-
|
187
|
+
unless start_tag[1..-3][/\w+$/] then
|
188
|
+
raise RexleParserException, 'invalid closing tag found ' + \
|
189
|
+
start_tag.reverse + '; context: ' + r[0..120].reverse.inspect
|
190
|
+
end
|
195
191
|
|
196
|
-
|
197
|
-
text = raws[/[^<]+/m] #
|
198
|
-
remaining = $'
|
192
|
+
until end_tag do
|
199
193
|
|
200
|
-
|
201
|
-
puts 'parse() text: ' + text.inspect
|
202
|
-
puts 'cur tag: ' + cur[0].inspect
|
203
|
-
end
|
194
|
+
key, res = scan_next r, tagname
|
204
195
|
|
205
|
-
|
206
|
-
|
196
|
+
case key
|
197
|
+
when :end_tag
|
198
|
+
end_tag = res
|
199
|
+
r2 = [start_tag, children, end_tag]
|
200
|
+
end_tag = nil
|
201
|
+
|
202
|
+
return r2
|
203
|
+
when :child
|
204
|
+
children << res
|
205
|
+
when :newnode
|
206
|
+
children << parse_node(r, tagname)
|
207
207
|
else
|
208
|
-
|
208
|
+
break
|
209
209
|
end
|
210
|
-
|
211
|
-
puts 'remaining: ' + remaining.inspect if @debug
|
212
|
-
parse(remaining, a, cur) if remaining.length > 0
|
213
|
-
|
214
|
-
|
215
|
-
# is it a closing tag?
|
216
|
-
elsif s =~ /^\s?<\/\w+>/m
|
217
|
-
|
218
|
-
tail = s[/^\s*<\/\w+>(.*)/m,1]
|
219
|
-
|
220
|
-
if @debug then
|
221
|
-
puts 'parse()/closing tag ' + s[/^\s*<\/\w+>/].inspect
|
222
|
-
puts '>a: ' + a.inspect
|
223
|
-
end
|
224
|
-
|
225
|
-
@stack.pop
|
226
|
-
#a << []
|
227
|
-
parse(tail, a, @stack.last)
|
228
|
-
|
229
|
-
elsif s.empty? and @stack.length > 0
|
230
|
-
|
231
|
-
puts 'parse() no end tag!' if @debug
|
232
|
-
|
233
210
|
end
|
234
211
|
|
235
|
-
|
236
|
-
|
212
|
+
[start_tag, children, end_tag]
|
237
213
|
end
|
238
214
|
|
239
|
-
|
240
|
-
#
|
241
|
-
def parsetag(s)
|
215
|
+
def get_attributes(raw_attributes)
|
242
216
|
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
'
|
250
|
-
|
251
|
-
'!['
|
252
|
-
else
|
253
|
-
rawtagname
|
217
|
+
r1 = /([\w\-:\(\)]+\='[^']*)'/
|
218
|
+
r2 = /([\w\-:\(\)]+\="[^"]*)"/
|
219
|
+
|
220
|
+
r = raw_attributes.scan(/#{r1}|#{r2}/).map(&:compact)\
|
221
|
+
.flatten.inject(Attributes.new) do |r, x|
|
222
|
+
attr_name, raw_val = x.split(/=/,2)
|
223
|
+
val = attr_name != 'class' ? raw_val[1..-1] : raw_val[1..-1].split
|
224
|
+
r.merge(attr_name.to_sym => val)
|
254
225
|
end
|
255
226
|
|
256
|
-
|
227
|
+
return r
|
257
228
|
end
|
258
229
|
|
259
|
-
def
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
#puts 'tag: ' + tag.inspect
|
266
|
-
istart = s =~ /#{tag[3]}/
|
267
|
-
return s unless istart
|
268
|
-
|
269
|
-
iend = s =~ /#{tag[1]}/
|
270
|
-
comment ="<_%s>%s</_%s>" % [tag[2], s[istart+tag[0].length.. iend-1].gsub('&','&').gsub('<','<').gsub('>','>'), tag[2]]
|
271
|
-
|
272
|
-
if @debug then
|
273
|
-
puts 'comment: ' + comment.inspect
|
274
|
-
# construct the new string
|
275
|
-
puts 'istart: ' + istart.inspect
|
276
|
-
end
|
277
|
-
|
278
|
-
s3 = s[0,istart].to_s + comment + s[iend+3..-1]
|
279
|
-
scancom(s3, type)
|
230
|
+
def reverse(raw_obj)
|
231
|
+
|
232
|
+
return unless raw_obj
|
233
|
+
obj = raw_obj.clone
|
234
|
+
return obj.reverse! if obj.is_a? String
|
280
235
|
|
281
|
-
|
236
|
+
tag = obj.pop.reverse
|
237
|
+
|
238
|
+
children = obj[-1]
|
282
239
|
|
240
|
+
r = children.reverse.map {|x| reverse(x)}
|
241
|
+
|
242
|
+
return [tag[/[!\-\w:\[]+/], get_attributes(tag), *r]
|
243
|
+
end
|
283
244
|
end
|
data.tar.gz.sig
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rexleparser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Robertson
|
@@ -64,7 +64,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
64
64
|
- !ruby/object:Gem::Version
|
65
65
|
version: '0'
|
66
66
|
requirements: []
|
67
|
-
rubygems_version: 3.4.
|
67
|
+
rubygems_version: 3.4.4
|
68
68
|
signing_key:
|
69
69
|
specification_version: 4
|
70
70
|
summary: Rexleparser is an XML parser used by the Rexle gem
|
metadata.gz.sig
CHANGED
Binary file
|