rexleparser 0.4.19 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/lib/rexleparser.rb +91 -113
- data.tar.gz.sig +0 -0
- metadata +2 -2
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4fd9dcc1abbf7f2b61bd3d2f86d4e304d14b7356
|
4
|
+
data.tar.gz: ded2e477c2d4f6a634d33fe89a598946366ee9ee
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5bf24ad6b143d2e89cf8b25b2352b89df9246ed97c59cb48e14b0231348a7750619e9fe3cfc0894220c4f1a980237be3aa5635f704ff84bafb368df102fead72
|
7
|
+
data.tar.gz: 70209b2013bafa24af4667d76647e413caed13b8c66b0fdd8d27ea71d032328ad850e44ac027493bc26029d9602d5a3fdfc2d0082ed3215cdba5e54459281eb5
|
checksums.yaml.gz.sig
CHANGED
Binary file
|
data/lib/rexleparser.rb
CHANGED
@@ -5,154 +5,109 @@
|
|
5
5
|
|
6
6
|
class RexleParser
|
7
7
|
|
8
|
-
attr_reader :instructions, :doctype
|
8
|
+
attr_reader :instructions, :doctype, :to_a
|
9
9
|
|
10
10
|
def initialize(raw_s)
|
11
|
+
|
11
12
|
super()
|
12
13
|
s = raw_s.clone
|
13
14
|
@instructions = s.scan(/<\?([\w-]+) ([^>]+)\?>/)
|
14
15
|
@doctype = s.slice!(/<!DOCTYPE html>\n?/)
|
15
|
-
@a = scan_element(s.strip.gsub(/<\?[^>]+>/,'').split(//))
|
16
|
-
end
|
17
|
-
|
18
|
-
def to_a() @a end
|
19
16
|
|
20
|
-
|
21
|
-
|
22
|
-
|
17
|
+
s2 = s.gsub('<![CDATA[','<!cdata>').gsub(']]>','</!cdata>')
|
18
|
+
@to_a = reverse(parse(s2.strip.gsub(/<\?[^>]+>/,'').reverse))
|
19
|
+
|
23
20
|
end
|
24
21
|
|
22
|
+
|
25
23
|
private
|
26
24
|
|
27
|
-
def scan_a(a)
|
28
|
-
a.inject([]) do |r, x|
|
29
|
-
name, value, attributes, *remaining = x
|
30
|
-
text_remaining = scan_a remaining if remaining
|
31
|
-
r << value.strip << text_remaining if value
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
def scan_element(a)
|
36
|
-
|
37
|
-
a.shift until a[0] == '<' and a[1] != '/' or a.length < 1
|
38
|
-
|
39
|
-
return unless a.length > 1
|
40
25
|
|
41
|
-
|
26
|
+
def scan_next(r, tagname)
|
42
27
|
|
43
|
-
|
44
|
-
if a[0..1].join == '![' then
|
28
|
+
j = tagname
|
45
29
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
value << a.shift until a[0..2].join == ']]>' or a.length <= 1
|
51
|
-
a.slice!(0,3)
|
52
|
-
return [name, value, {}]
|
53
|
-
elsif a[0..2].join == '!--' then
|
54
|
-
name = '!-'
|
55
|
-
#<![CDATA[
|
56
|
-
#<!--
|
57
|
-
3.times{ a.shift }
|
58
|
-
value = ''
|
59
|
-
|
60
|
-
value << a.shift until a[0..2].join == '-->' or a.length <= 1
|
61
|
-
a.slice!(0,3)
|
62
|
-
return [name, value, {}]
|
63
|
-
else
|
30
|
+
if (r =~ /^>/) == 0 then
|
31
|
+
|
32
|
+
# end tag match
|
33
|
+
tag = r[/^>[^<]+</]
|
64
34
|
|
65
|
-
|
66
|
-
name << a.shift
|
67
|
-
name << a.shift while a[0] != ' ' and a[0] != '>' and a[0] != '/'
|
35
|
+
if tag[/^>.*[^\/]<$/] then
|
68
36
|
|
69
|
-
|
37
|
+
# is it the end tag to match the start tag?
|
38
|
+
tag = r.slice!(/^>[^<]+</)
|
39
|
+
end_tag = tag[/^>[^>]*#{j}<$/]
|
70
40
|
|
71
|
-
|
72
|
-
|
73
|
-
|
41
|
+
if end_tag then
|
42
|
+
|
43
|
+
j = nil
|
44
|
+
return [:end_tag, end_tag]
|
45
|
+
|
46
|
+
elsif tag[/^>[^>]*\w+<$/] then
|
47
|
+
|
48
|
+
# broken tag found
|
49
|
+
broken_tag = tag
|
50
|
+
return [:child, [nil, [], broken_tag]] if broken_tag
|
51
|
+
else
|
74
52
|
|
75
|
-
|
76
|
-
|
53
|
+
text, tag = tag.sub('>',';tg&').split(/>/,2)
|
54
|
+
r.prepend '>' + tag
|
55
|
+
return [:child, text]
|
56
|
+
end
|
77
57
|
|
78
|
-
|
79
|
-
a.shift(2)
|
58
|
+
else
|
80
59
|
|
81
|
-
|
82
|
-
|
83
|
-
#a.shift until a[0] == '<' or a.length < 1
|
84
|
-
raw_values.strip!
|
60
|
+
# it's a start tag?
|
61
|
+
return [:newnode] if tag[/^>.*[\w!]+\/<$/]
|
85
62
|
|
86
|
-
|
87
|
-
element = [name, nil, attributes]
|
63
|
+
end # end of tag match
|
88
64
|
|
89
|
-
|
90
|
-
return [element, after_text.join]
|
65
|
+
else
|
91
66
|
|
92
|
-
|
67
|
+
# it's a text value
|
68
|
+
text = r.slice!(/[^>]+/)
|
69
|
+
return [:child, text] if text
|
70
|
+
end
|
71
|
+
end
|
93
72
|
|
94
|
-
|
73
|
+
def parse(r, j=nil)
|
95
74
|
|
96
|
-
|
97
|
-
value, attributes = get_value_and_attribs(raw_values)
|
98
|
-
end
|
75
|
+
tag = r.slice!(/^>[^<]+</) if (r =~ /^>[^<]+</) == 0
|
99
76
|
|
100
|
-
|
101
|
-
tag = a[0, name.length + 3].join
|
102
|
-
|
103
|
-
return unless a.length > 0
|
104
|
-
|
105
|
-
children = tag == ("</%s>" % name) ? false : true
|
106
|
-
|
107
|
-
if children == true then
|
77
|
+
if tag[0,3] == '>--' then
|
108
78
|
|
109
|
-
|
110
|
-
|
79
|
+
i = r =~ /<--/
|
80
|
+
tag += r.slice!(0,i+5)
|
81
|
+
# it's a comment tag
|
82
|
+
tagname = '-!'
|
83
|
+
return [">#{tagname}<", [tag[/>--(.*)--!</,1]], ">#{tagname}/<"]
|
84
|
+
end
|
85
|
+
|
86
|
+
tagname = tag[/([\w!]+)\/?<$/,1]
|
111
87
|
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
return after_text.length >= 1 ? [element, after_text.join] : element
|
88
|
+
# self closing tag?
|
89
|
+
if tag[/^>\/.*#{tagname}<$/] then
|
90
|
+
return [">/#{tagname}<", [], "#{tag.sub(/>\//,'>')}"]
|
91
|
+
end
|
118
92
|
|
119
|
-
|
93
|
+
start_tag, children, end_tag = tag, [], nil
|
120
94
|
|
121
|
-
|
122
|
-
a.slice!(0, name.length + 3) if a[0, name.length + 3].join \
|
123
|
-
== "</%s>" % name
|
124
|
-
after_text = []
|
125
|
-
after_text << a.shift until a[0] == '<' or a.length <= 1
|
95
|
+
until end_tag do
|
126
96
|
|
127
|
-
|
97
|
+
key, res = scan_next r, tagname
|
128
98
|
|
129
|
-
|
99
|
+
case key
|
100
|
+
when :end_tag
|
101
|
+
end_tag = res
|
102
|
+
return [start_tag, children, end_tag]
|
103
|
+
when :child
|
104
|
+
children << res
|
105
|
+
when :newnode
|
106
|
+
children << parse(r, tagname)
|
130
107
|
end
|
131
108
|
end
|
132
109
|
|
133
|
-
|
134
|
-
|
135
|
-
def scan_elements(a, element)
|
136
|
-
r = scan_element(a)
|
137
|
-
|
138
|
-
if r and r[0].is_a?(Array) then
|
139
|
-
element = r.inject(element) {|r,x| r << x} if r
|
140
|
-
elsif r
|
141
|
-
element << r
|
142
|
-
end
|
143
|
-
return a
|
144
|
-
end
|
145
|
-
|
146
|
-
def get_value_and_attribs(raw_values)
|
147
|
-
|
148
|
-
match_found = raw_values.match(/([^>]*)>(.*)/m)
|
149
|
-
|
150
|
-
if match_found then
|
151
|
-
raw_attributes, value = match_found.captures
|
152
|
-
attributes = get_attributes(raw_attributes)
|
153
|
-
end
|
154
|
-
|
155
|
-
[value.gsub('>','>').gsub('<','<'), attributes]
|
110
|
+
[start_tag, children, end_tag]
|
156
111
|
end
|
157
112
|
|
158
113
|
def get_attributes(raw_attributes)
|
@@ -169,4 +124,27 @@ class RexleParser
|
|
169
124
|
|
170
125
|
return r
|
171
126
|
end
|
127
|
+
|
128
|
+
def reverse(raw_obj)
|
129
|
+
|
130
|
+
obj = raw_obj.clone
|
131
|
+
return obj.reverse! if obj.is_a? String
|
132
|
+
|
133
|
+
tag = obj.pop.reverse.sub('!cdata','!-')
|
134
|
+
|
135
|
+
children = obj[-1]
|
136
|
+
|
137
|
+
if children.last.is_a?(String) then
|
138
|
+
ltext ||= ''
|
139
|
+
ltext << children.pop.reverse
|
140
|
+
end
|
141
|
+
|
142
|
+
ltext << children.pop.reverse if children.last.is_a?(String)
|
143
|
+
|
144
|
+
r = children.reverse.map do |x|
|
145
|
+
reverse(x)
|
146
|
+
end
|
147
|
+
|
148
|
+
return [tag[/[!\-\w\[]+/], ltext, get_attributes(tag), *r]
|
149
|
+
end
|
172
150
|
end
|
data.tar.gz.sig
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rexleparser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Robertson
|
@@ -31,7 +31,7 @@ cert_chain:
|
|
31
31
|
CkjTMLaPwIQI6dsbG4bVJ7/XzL7c8niqJSF7M0yr1+2kUrWFUZMBMrUUxgZxSkjL
|
32
32
|
Cgd76bp2zjiyCw==
|
33
33
|
-----END CERTIFICATE-----
|
34
|
-
date:
|
34
|
+
date: 2015-01-25 00:00:00.000000000 Z
|
35
35
|
dependencies: []
|
36
36
|
description:
|
37
37
|
email: james@r0bertson.co.uk
|
metadata.gz.sig
CHANGED
Binary file
|