rexleparser 0.4.19 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9cef0695d2e3117b683c543e54f66740d174f9e9
4
- data.tar.gz: 5faa7ee7f21ec164bfa8e49f88ca9fad4ad7c415
3
+ metadata.gz: 4fd9dcc1abbf7f2b61bd3d2f86d4e304d14b7356
4
+ data.tar.gz: ded2e477c2d4f6a634d33fe89a598946366ee9ee
5
5
  SHA512:
6
- metadata.gz: 06f2ad8866f62ddac3dc4c2db0b4d19b89a607f170645460d34ca3bab4e2cc206555fab7843f6b8a92a0d8239e156e4d12e4bdb2cc2e9fc9b6fb4ca93539397a
7
- data.tar.gz: 2b5772c4220d2b8cb0a6cc52a10ded34a6726b25cf316429021232ff5fa863d43d478458622d2f3b42878f507cffb49a3d09c984e5d39c9bb3c8780d5a2b5ee0
6
+ metadata.gz: 5bf24ad6b143d2e89cf8b25b2352b89df9246ed97c59cb48e14b0231348a7750619e9fe3cfc0894220c4f1a980237be3aa5635f704ff84bafb368df102fead72
7
+ data.tar.gz: 70209b2013bafa24af4667d76647e413caed13b8c66b0fdd8d27ea71d032328ad850e44ac027493bc26029d9602d5a3fdfc2d0082ed3215cdba5e54459281eb5
checksums.yaml.gz.sig CHANGED
Binary file
data/lib/rexleparser.rb CHANGED
@@ -5,154 +5,109 @@
5
5
 
6
6
  class RexleParser
7
7
 
8
- attr_reader :instructions, :doctype
8
+ attr_reader :instructions, :doctype, :to_a
9
9
 
10
10
  def initialize(raw_s)
11
+
11
12
  super()
12
13
  s = raw_s.clone
13
14
  @instructions = s.scan(/<\?([\w-]+) ([^>]+)\?>/)
14
15
  @doctype = s.slice!(/<!DOCTYPE html>\n?/)
15
- @a = scan_element(s.strip.gsub(/<\?[^>]+>/,'').split(//))
16
- end
17
-
18
- def to_a() @a end
19
16
 
20
- def to_s()
21
- name, value, attributes, *remaining = @a
22
- [value.strip, scan_a(remaining)].flatten.join(' ')
17
+ s2 = s.gsub('<![CDATA[','<!cdata>').gsub(']]>','</!cdata>')
18
+ @to_a = reverse(parse(s2.strip.gsub(/<\?[^>]+>/,'').reverse))
19
+
23
20
  end
24
21
 
22
+
25
23
  private
26
24
 
27
- def scan_a(a)
28
- a.inject([]) do |r, x|
29
- name, value, attributes, *remaining = x
30
- text_remaining = scan_a remaining if remaining
31
- r << value.strip << text_remaining if value
32
- end
33
- end
34
-
35
- def scan_element(a)
36
-
37
- a.shift until a[0] == '<' and a[1] != '/' or a.length < 1
38
-
39
- return unless a.length > 1
40
25
 
41
- a.shift
26
+ def scan_next(r, tagname)
42
27
 
43
- # CDATA ?
44
- if a[0..1].join == '![' then
28
+ j = tagname
45
29
 
46
- name = '!['
47
- 8.times{ a.shift }
48
- value = ''
49
-
50
- value << a.shift until a[0..2].join == ']]>' or a.length <= 1
51
- a.slice!(0,3)
52
- return [name, value, {}]
53
- elsif a[0..2].join == '!--' then
54
- name = '!-'
55
- #<![CDATA[
56
- #<!--
57
- 3.times{ a.shift }
58
- value = ''
59
-
60
- value << a.shift until a[0..2].join == '-->' or a.length <= 1
61
- a.slice!(0,3)
62
- return [name, value, {}]
63
- else
30
+ if (r =~ /^>/) == 0 then
31
+
32
+ # end tag match
33
+ tag = r[/^>[^<]+</]
64
34
 
65
- name = ''
66
- name << a.shift
67
- name << a.shift while a[0] != ' ' and a[0] != '>' and a[0] != '/'
35
+ if tag[/^>.*[^\/]<$/] then
68
36
 
69
- return unless name
37
+ # is it the end tag to match the start tag?
38
+ tag = r.slice!(/^>[^<]+</)
39
+ end_tag = tag[/^>[^>]*#{j}<$/]
70
40
 
71
- # find the closing tag
72
- i = a.index('>')
73
- raw_values = ''
41
+ if end_tag then
42
+
43
+ j = nil
44
+ return [:end_tag, end_tag]
45
+
46
+ elsif tag[/^>[^>]*\w+<$/] then
47
+
48
+ # broken tag found
49
+ broken_tag = tag
50
+ return [:child, [nil, [], broken_tag]] if broken_tag
51
+ else
74
52
 
75
- # is it a self closing tag?
76
- if a[i-1] == '/' then
53
+ text, tag = tag.sub('>',';tg&').split(/>/,2)
54
+ r.prepend '>' + tag
55
+ return [:child, text]
56
+ end
77
57
 
78
- raw_values << a.shift until (a[0] + a[1..-1].join.strip[0]) == '/>'
79
- a.shift(2)
58
+ else
80
59
 
81
- after_text = []
82
- after_text << a.shift until a[0] == '<' or a.length <= 1
83
- #a.shift until a[0] == '<' or a.length < 1
84
- raw_values.strip!
60
+ # it's a start tag?
61
+ return [:newnode] if tag[/^>.*[\w!]+\/<$/]
85
62
 
86
- attributes = raw_values.length > 0 ? get_attributes(raw_values) : {}
87
- element = [name, nil, attributes]
63
+ end # end of tag match
88
64
 
89
- return element if after_text.empty?
90
- return [element, after_text.join]
65
+ else
91
66
 
92
- else
67
+ # it's a text value
68
+ text = r.slice!(/[^>]+/)
69
+ return [:child, text] if text
70
+ end
71
+ end
93
72
 
94
- raw_values << a.shift until a[0] == '<'
73
+ def parse(r, j=nil)
95
74
 
96
- if raw_values[1..-1].length > 0 then
97
- value, attributes = get_value_and_attribs(raw_values)
98
- end
75
+ tag = r.slice!(/^>[^<]+</) if (r =~ /^>[^<]+</) == 0
99
76
 
100
- element = [name, value, attributes || {}]
101
- tag = a[0, name.length + 3].join
102
-
103
- return unless a.length > 0
104
-
105
- children = tag == ("</%s>" % name) ? false : true
106
-
107
- if children == true then
77
+ if tag[0,3] == '>--' then
108
78
 
109
- xa = scan_elements(a, element) until (a[0, name.length + 3].join \
110
- == "</%s>" % [name]) or a.length < 2
79
+ i = r =~ /<--/
80
+ tag += r.slice!(0,i+5)
81
+ # it's a comment tag
82
+ tagname = '-!'
83
+ return [">#{tagname}<", [tag[/>--(.*)--!</,1]], ">#{tagname}/<"]
84
+ end
85
+
86
+ tagname = tag[/([\w!]+)\/?<$/,1]
111
87
 
112
- xa.shift until xa[0] == '>' or xa.length <= 1
113
- xa.shift
114
- after_text = []
115
- after_text << xa.shift until xa[0] == '<' or xa.length <= 1
116
-
117
- return after_text.length >= 1 ? [element, after_text.join] : element
88
+ # self closing tag?
89
+ if tag[/^>\/.*#{tagname}<$/] then
90
+ return [">/#{tagname}<", [], "#{tag.sub(/>\//,'>')}"]
91
+ end
118
92
 
119
- else
93
+ start_tag, children, end_tag = tag, [], nil
120
94
 
121
- #check for its end tag
122
- a.slice!(0, name.length + 3) if a[0, name.length + 3].join \
123
- == "</%s>" % name
124
- after_text = []
125
- after_text << a.shift until a[0] == '<' or a.length <= 1
95
+ until end_tag do
126
96
 
127
- return after_text.length >= 1 ? [element, after_text.join] : element
97
+ key, res = scan_next r, tagname
128
98
 
129
- end
99
+ case key
100
+ when :end_tag
101
+ end_tag = res
102
+ return [start_tag, children, end_tag]
103
+ when :child
104
+ children << res
105
+ when :newnode
106
+ children << parse(r, tagname)
130
107
  end
131
108
  end
132
109
 
133
- end
134
-
135
- def scan_elements(a, element)
136
- r = scan_element(a)
137
-
138
- if r and r[0].is_a?(Array) then
139
- element = r.inject(element) {|r,x| r << x} if r
140
- elsif r
141
- element << r
142
- end
143
- return a
144
- end
145
-
146
- def get_value_and_attribs(raw_values)
147
-
148
- match_found = raw_values.match(/([^>]*)>(.*)/m)
149
-
150
- if match_found then
151
- raw_attributes, value = match_found.captures
152
- attributes = get_attributes(raw_attributes)
153
- end
154
-
155
- [value.gsub('>','&gt;').gsub('<','&lt;'), attributes]
110
+ [start_tag, children, end_tag]
156
111
  end
157
112
 
158
113
  def get_attributes(raw_attributes)
@@ -169,4 +124,27 @@ class RexleParser
169
124
 
170
125
  return r
171
126
  end
127
+
128
+ def reverse(raw_obj)
129
+
130
+ obj = raw_obj.clone
131
+ return obj.reverse! if obj.is_a? String
132
+
133
+ tag = obj.pop.reverse.sub('!cdata','!-')
134
+
135
+ children = obj[-1]
136
+
137
+ if children.last.is_a?(String) then
138
+ ltext ||= ''
139
+ ltext << children.pop.reverse
140
+ end
141
+
142
+ ltext << children.pop.reverse if children.last.is_a?(String)
143
+
144
+ r = children.reverse.map do |x|
145
+ reverse(x)
146
+ end
147
+
148
+ return [tag[/[!\-\w\[]+/], ltext, get_attributes(tag), *r]
149
+ end
172
150
  end
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rexleparser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.19
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
@@ -31,7 +31,7 @@ cert_chain:
31
31
  CkjTMLaPwIQI6dsbG4bVJ7/XzL7c8niqJSF7M0yr1+2kUrWFUZMBMrUUxgZxSkjL
32
32
  Cgd76bp2zjiyCw==
33
33
  -----END CERTIFICATE-----
34
- date: 2014-11-20 00:00:00.000000000 Z
34
+ date: 2015-01-25 00:00:00.000000000 Z
35
35
  dependencies: []
36
36
  description:
37
37
  email: james@r0bertson.co.uk
metadata.gz.sig CHANGED
Binary file