rexleparser 1.0.0 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/lib/rexleparser.rb +127 -166
- data.tar.gz.sig +0 -0
- metadata +2 -2
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a98589dfd93cc2c5a4946f9dc5a2bc5418bd30cbfc4662330d7698bf13b1e107
|
4
|
+
data.tar.gz: 92a35568634aa47b9032aeb26cc193830adee7ba0c3d48dc2a53458119671791
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1132a312974ad5eaef5b1267bf18f35e0228138a3247a867e6e9de4f366f8c654b6f24220e68922cca90698520c07bf45cdc52d44390fedf50e8c97df3ac0666
|
7
|
+
data.tar.gz: 211dddd0cd0411e4311541dd88e8742dbb1f32a2d5cdd6a25b48d90ef25ffd4a4bab7525180c5bdfe7a0b8afb1610e4b532e2fefec4980acae2d22dc160b0602
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data/lib/rexleparser.rb
CHANGED
@@ -62,222 +62,183 @@ class Attributes < Hash
|
|
62
62
|
end
|
63
63
|
end
|
64
64
|
|
65
|
-
|
66
65
|
class RexleParserException < Exception
|
67
66
|
end
|
68
67
|
|
69
68
|
class RexleParser
|
70
69
|
|
71
|
-
attr_reader :
|
70
|
+
attr_reader :instructions, :doctype, :to_a
|
72
71
|
|
73
|
-
def initialize(
|
72
|
+
def initialize(raw_s)
|
74
73
|
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
@stack = []
|
74
|
+
super()
|
75
|
+
s = raw_s.clone.strip
|
76
|
+
return if s.empty?
|
79
77
|
|
80
78
|
raw_xml, raw_instrctns = if s.lines.first =~ /<?xml/ then
|
81
|
-
s.split(/(
|
79
|
+
s.split(/(?=\?>\s*<\w)/,2).reverse
|
82
80
|
else
|
83
81
|
s
|
84
82
|
end
|
85
|
-
puts 'raw_xml: ' + raw_xml.inspect if @debug
|
86
83
|
@instructions = raw_instrctns ? \
|
87
84
|
raw_instrctns.scan(/<\?([\w-]+) ([^\?]+)/) : []
|
88
85
|
@doctype = s.slice!(/<!DOCTYPE html>\n?/) if s.lines.first =~ /<\!DOCTYPE/
|
89
|
-
|
90
|
-
# scancom is run twice because we 1st check for comment tags and then cdata tags
|
91
|
-
@a = parse(scancom(scancom(raw_xml), type: :cdata)).flatten(1)
|
92
|
-
|
93
|
-
end
|
86
|
+
@to_a = reverse(parse_node(raw_xml.strip.reverse))
|
94
87
|
|
95
|
-
def to_a()
|
96
|
-
@a
|
97
88
|
end
|
98
|
-
|
89
|
+
|
99
90
|
private
|
91
|
+
|
100
92
|
|
101
|
-
def
|
93
|
+
def scan_next(r, tagname)
|
102
94
|
|
103
|
-
|
104
|
-
puts '_s: ' + s.inspect if @debug
|
105
|
-
# fetch the element head
|
106
|
-
tag = s =~ /<[^>]+\/?>/
|
107
|
-
s2 = s[tag+1..-1]
|
108
|
-
tagb = s2 =~ />/
|
109
|
-
return unless tag
|
95
|
+
j = tagname
|
110
96
|
|
111
|
-
|
112
|
-
|
113
|
-
if @debug then
|
114
|
-
puts 'ehead()/detail: ' + [tag, tagb, len, s[tag,len+1]].inspect
|
115
|
-
end
|
97
|
+
if r[0] == '>' then
|
116
98
|
|
117
|
-
|
99
|
+
# end tag match
|
100
|
+
tag = r[/^>[^<]+</]
|
101
|
+
|
102
|
+
if tag[1][/[ \w"']/] and tag[-2] != '/' then
|
118
103
|
|
119
|
-
|
104
|
+
# is it the end tag to match the start tag?
|
105
|
+
tag = r.slice!(/^>[^<]+</)
|
106
|
+
end_tag = tag[/^>[^>]*#{j}<$/]
|
120
107
|
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
108
|
+
if end_tag then
|
109
|
+
|
110
|
+
j = nil
|
111
|
+
return [:end_tag, end_tag]
|
112
|
+
|
113
|
+
elsif tag[/^>[^>]*\w+<$/] then
|
114
|
+
# broken tag found
|
115
|
+
broken_tag = tag
|
116
|
+
return [:child, [nil, [], broken_tag]] if broken_tag
|
117
|
+
else
|
118
|
+
|
119
|
+
text, newtag = tag.sub('>',';tg&').split(/>/,2)
|
120
|
+
|
121
|
+
if newtag then
|
122
|
+
tag = newtag
|
123
|
+
r.prepend '>' + tag
|
124
|
+
end
|
125
|
+
|
126
|
+
return [:child, text]
|
127
|
+
end
|
128
|
+
elsif r[0,3] == '>--' then # comment tag found
|
129
|
+
|
130
|
+
r.slice!(0,3)
|
131
|
+
i = r =~ /(\-\-!<)/
|
132
|
+
s = r.slice!(0,i)
|
133
|
+
r.slice!(0,4)
|
132
134
|
|
133
|
-
|
134
|
-
|
135
|
+
tagname, content = ['-!',s]
|
136
|
+
|
137
|
+
return [:child, [">#{tagname}<", [content], ">#{tagname}/<"]]
|
138
|
+
|
139
|
+
elsif r[0,3] == '>]]' then # CDATA tag found
|
135
140
|
|
136
|
-
|
141
|
+
r.slice!(0,3)
|
142
|
+
i = r =~ /(\[ATADC\[!<)/
|
143
|
+
s = r.slice!(0,i)
|
144
|
+
r.slice!(0,9)
|
137
145
|
|
138
|
-
|
139
|
-
|
140
|
-
if @debug then
|
141
|
-
puts '.parse() s: ' + s.inspect[0..600]
|
142
|
-
puts '.parse() a: ' + a.inspect[0..699]
|
143
|
-
puts '.parse() cur: ' + cur.inspect[0..799]
|
144
|
-
end
|
146
|
+
tagname, content = ['[!',s]
|
145
147
|
|
146
|
-
|
147
|
-
|
148
|
+
return [:child, [">#{tagname}<", [content], ">#{tagname}/<"]]
|
149
|
+
|
150
|
+
elsif tag[/>\/|\/<$/] or tag[/^>.*[\w!]+\/<$/] then
|
151
|
+
|
152
|
+
return [:newnode]
|
153
|
+
|
154
|
+
else
|
155
|
+
|
156
|
+
r.sub!('>',';tg&')
|
157
|
+
i = r =~ />(?:[\-\/"'\w]|\]\])/ # collect until a tag is found or a CDATA element
|
158
|
+
text = r.slice!(0,i)
|
148
159
|
|
149
|
-
|
150
|
-
puts 'parse() self-closing/tag: ' + tag.inspect if @debug
|
151
|
-
tail = $'
|
152
|
-
|
153
|
-
if @debug then
|
154
|
-
puts 'parse() self-closing tag found'
|
155
|
-
puts 'parse()/tail: ' + tail.inspect
|
156
|
-
end
|
157
|
-
|
158
|
-
a2 = parsetag(tag)
|
159
|
-
puts '_a: ' + a.inspect if @debug
|
160
|
-
cur ? a.last << a2 : a << a2
|
161
|
-
|
162
|
-
parse(tail, a, cur)
|
160
|
+
return [:child, text] if text
|
163
161
|
|
164
|
-
|
165
|
-
elsif (s =~ /^<[^\/>]+>/) == 0 then
|
162
|
+
end # end of tag match
|
166
163
|
|
167
|
-
|
164
|
+
else
|
168
165
|
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
puts 'parse() tag: ' + tag.inspect
|
173
|
-
puts 'parse() tail: ' + tail.inspect
|
174
|
-
end
|
175
|
-
# add it to the list
|
176
|
-
a2 = parsetag(tag)
|
166
|
+
# it's a text value
|
167
|
+
i = r =~ />(?:[\-\/"'\w]|\]\])/ # collect until a tag is found or a CDATA element
|
168
|
+
text = r.slice!(0,i)
|
177
169
|
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
170
|
+
return [:child, text] if text
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
def parse_node(r, j=nil)
|
175
|
+
|
176
|
+
return unless r.length > 0
|
177
|
+
tag = r.slice!(/^>[^<]+</) if (r =~ /^>[^<]+</) == 0
|
178
|
+
tagname = tag[/([\w!:]+)\/?<$/,1]
|
186
179
|
|
187
|
-
|
180
|
+
# self closing tag?
|
181
|
+
if tag[/^>\/.*#{tagname}<$/m] then
|
182
|
+
return [">/#{tagname}<", [], "#{tag.sub(/>\//,'>')}"]
|
183
|
+
end
|
188
184
|
|
189
|
-
|
190
|
-
@stack.push cur2
|
185
|
+
start_tag, children, end_tag = tag, [], nil
|
191
186
|
|
192
|
-
|
193
|
-
|
194
|
-
|
187
|
+
unless start_tag[1..-3][/\w+$/] then
|
188
|
+
raise RexleParserException, 'invalid closing tag found ' + \
|
189
|
+
start_tag.reverse + '; context: ' + r[0..120].reverse.inspect
|
190
|
+
end
|
195
191
|
|
196
|
-
|
197
|
-
text = raws[/[^<]+/m] #
|
198
|
-
remaining = $'
|
192
|
+
until end_tag do
|
199
193
|
|
200
|
-
|
201
|
-
puts 'parse() text: ' + text.inspect
|
202
|
-
puts 'cur tag: ' + cur[0].inspect
|
203
|
-
end
|
194
|
+
key, res = scan_next r, tagname
|
204
195
|
|
205
|
-
|
206
|
-
|
196
|
+
case key
|
197
|
+
when :end_tag
|
198
|
+
end_tag = res
|
199
|
+
r2 = [start_tag, children, end_tag]
|
200
|
+
end_tag = nil
|
201
|
+
|
202
|
+
return r2
|
203
|
+
when :child
|
204
|
+
children << res
|
205
|
+
when :newnode
|
206
|
+
children << parse_node(r, tagname)
|
207
207
|
else
|
208
|
-
|
208
|
+
break
|
209
209
|
end
|
210
|
-
|
211
|
-
puts 'remaining: ' + remaining.inspect if @debug
|
212
|
-
parse(remaining, a, cur) if remaining.length > 0
|
213
|
-
|
214
|
-
|
215
|
-
# is it a closing tag?
|
216
|
-
elsif s =~ /^\s?<\/\w+>/m
|
217
|
-
|
218
|
-
tail = s[/^\s*<\/\w+>(.*)/m,1]
|
219
|
-
|
220
|
-
if @debug then
|
221
|
-
puts 'parse()/closing tag ' + s[/^\s*<\/\w+>/].inspect
|
222
|
-
puts '>a: ' + a.inspect
|
223
|
-
end
|
224
|
-
|
225
|
-
@stack.pop
|
226
|
-
#a << []
|
227
|
-
parse(tail, a, @stack.last)
|
228
|
-
|
229
|
-
elsif s.empty? and @stack.length > 0
|
230
|
-
|
231
|
-
puts 'parse() no end tag!' if @debug
|
232
|
-
|
233
210
|
end
|
234
211
|
|
235
|
-
|
236
|
-
|
212
|
+
[start_tag, children, end_tag]
|
237
213
|
end
|
238
214
|
|
239
|
-
|
240
|
-
#
|
241
|
-
def parsetag(s)
|
215
|
+
def get_attributes(raw_attributes)
|
242
216
|
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
'
|
250
|
-
|
251
|
-
'!['
|
252
|
-
else
|
253
|
-
rawtagname
|
217
|
+
r1 = /([\w\-:\(\)]+\='[^']*)'/
|
218
|
+
r2 = /([\w\-:\(\)]+\="[^"]*)"/
|
219
|
+
|
220
|
+
r = raw_attributes.scan(/#{r1}|#{r2}/).map(&:compact)\
|
221
|
+
.flatten.inject(Attributes.new) do |r, x|
|
222
|
+
attr_name, raw_val = x.split(/=/,2)
|
223
|
+
val = attr_name != 'class' ? raw_val[1..-1] : raw_val[1..-1].split
|
224
|
+
r.merge(attr_name.to_sym => val)
|
254
225
|
end
|
255
226
|
|
256
|
-
|
227
|
+
return r
|
257
228
|
end
|
258
229
|
|
259
|
-
def
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
#puts 'tag: ' + tag.inspect
|
266
|
-
istart = s =~ /#{tag[3]}/
|
267
|
-
return s unless istart
|
268
|
-
|
269
|
-
iend = s =~ /#{tag[1]}/
|
270
|
-
comment ="<_%s>%s</_%s>" % [tag[2], s[istart+tag[0].length.. iend-1].gsub('&','&').gsub('<','<').gsub('>','>'), tag[2]]
|
271
|
-
|
272
|
-
if @debug then
|
273
|
-
puts 'comment: ' + comment.inspect
|
274
|
-
# construct the new string
|
275
|
-
puts 'istart: ' + istart.inspect
|
276
|
-
end
|
277
|
-
|
278
|
-
s3 = s[0,istart].to_s + comment + s[iend+3..-1]
|
279
|
-
scancom(s3, type)
|
230
|
+
def reverse(raw_obj)
|
231
|
+
|
232
|
+
return unless raw_obj
|
233
|
+
obj = raw_obj.clone
|
234
|
+
return obj.reverse! if obj.is_a? String
|
280
235
|
|
281
|
-
|
236
|
+
tag = obj.pop.reverse
|
237
|
+
|
238
|
+
children = obj[-1]
|
282
239
|
|
240
|
+
r = children.reverse.map {|x| reverse(x)}
|
241
|
+
|
242
|
+
return [tag[/[!\-\w:\[]+/], get_attributes(tag), *r]
|
243
|
+
end
|
283
244
|
end
|
data.tar.gz.sig
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rexleparser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Robertson
|
@@ -64,7 +64,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
64
64
|
- !ruby/object:Gem::Version
|
65
65
|
version: '0'
|
66
66
|
requirements: []
|
67
|
-
rubygems_version: 3.4.
|
67
|
+
rubygems_version: 3.4.4
|
68
68
|
signing_key:
|
69
69
|
specification_version: 4
|
70
70
|
summary: Rexleparser is an XML parser used by the Rexle gem
|
metadata.gz.sig
CHANGED
Binary file
|