rexleparser 0.4.19 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9cef0695d2e3117b683c543e54f66740d174f9e9
4
- data.tar.gz: 5faa7ee7f21ec164bfa8e49f88ca9fad4ad7c415
3
+ metadata.gz: 4fd9dcc1abbf7f2b61bd3d2f86d4e304d14b7356
4
+ data.tar.gz: ded2e477c2d4f6a634d33fe89a598946366ee9ee
5
5
  SHA512:
6
- metadata.gz: 06f2ad8866f62ddac3dc4c2db0b4d19b89a607f170645460d34ca3bab4e2cc206555fab7843f6b8a92a0d8239e156e4d12e4bdb2cc2e9fc9b6fb4ca93539397a
7
- data.tar.gz: 2b5772c4220d2b8cb0a6cc52a10ded34a6726b25cf316429021232ff5fa863d43d478458622d2f3b42878f507cffb49a3d09c984e5d39c9bb3c8780d5a2b5ee0
6
+ metadata.gz: 5bf24ad6b143d2e89cf8b25b2352b89df9246ed97c59cb48e14b0231348a7750619e9fe3cfc0894220c4f1a980237be3aa5635f704ff84bafb368df102fead72
7
+ data.tar.gz: 70209b2013bafa24af4667d76647e413caed13b8c66b0fdd8d27ea71d032328ad850e44ac027493bc26029d9602d5a3fdfc2d0082ed3215cdba5e54459281eb5
checksums.yaml.gz.sig CHANGED
Binary file
data/lib/rexleparser.rb CHANGED
@@ -5,154 +5,109 @@
5
5
 
6
6
  class RexleParser
7
7
 
8
- attr_reader :instructions, :doctype
8
+ attr_reader :instructions, :doctype, :to_a
9
9
 
10
10
  def initialize(raw_s)
11
+
11
12
  super()
12
13
  s = raw_s.clone
13
14
  @instructions = s.scan(/<\?([\w-]+) ([^>]+)\?>/)
14
15
  @doctype = s.slice!(/<!DOCTYPE html>\n?/)
15
- @a = scan_element(s.strip.gsub(/<\?[^>]+>/,'').split(//))
16
- end
17
-
18
- def to_a() @a end
19
16
 
20
- def to_s()
21
- name, value, attributes, *remaining = @a
22
- [value.strip, scan_a(remaining)].flatten.join(' ')
17
+ s2 = s.gsub('<![CDATA[','<!cdata>').gsub(']]>','</!cdata>')
18
+ @to_a = reverse(parse(s2.strip.gsub(/<\?[^>]+>/,'').reverse))
19
+
23
20
  end
24
21
 
22
+
25
23
  private
26
24
 
27
- def scan_a(a)
28
- a.inject([]) do |r, x|
29
- name, value, attributes, *remaining = x
30
- text_remaining = scan_a remaining if remaining
31
- r << value.strip << text_remaining if value
32
- end
33
- end
34
-
35
- def scan_element(a)
36
-
37
- a.shift until a[0] == '<' and a[1] != '/' or a.length < 1
38
-
39
- return unless a.length > 1
40
25
 
41
- a.shift
26
+ def scan_next(r, tagname)
42
27
 
43
- # CDATA ?
44
- if a[0..1].join == '![' then
28
+ j = tagname
45
29
 
46
- name = '!['
47
- 8.times{ a.shift }
48
- value = ''
49
-
50
- value << a.shift until a[0..2].join == ']]>' or a.length <= 1
51
- a.slice!(0,3)
52
- return [name, value, {}]
53
- elsif a[0..2].join == '!--' then
54
- name = '!-'
55
- #<![CDATA[
56
- #<!--
57
- 3.times{ a.shift }
58
- value = ''
59
-
60
- value << a.shift until a[0..2].join == '-->' or a.length <= 1
61
- a.slice!(0,3)
62
- return [name, value, {}]
63
- else
30
+ if (r =~ /^>/) == 0 then
31
+
32
+ # end tag match
33
+ tag = r[/^>[^<]+</]
64
34
 
65
- name = ''
66
- name << a.shift
67
- name << a.shift while a[0] != ' ' and a[0] != '>' and a[0] != '/'
35
+ if tag[/^>.*[^\/]<$/] then
68
36
 
69
- return unless name
37
+ # is it the end tag to match the start tag?
38
+ tag = r.slice!(/^>[^<]+</)
39
+ end_tag = tag[/^>[^>]*#{j}<$/]
70
40
 
71
- # find the closing tag
72
- i = a.index('>')
73
- raw_values = ''
41
+ if end_tag then
42
+
43
+ j = nil
44
+ return [:end_tag, end_tag]
45
+
46
+ elsif tag[/^>[^>]*\w+<$/] then
47
+
48
+ # broken tag found
49
+ broken_tag = tag
50
+ return [:child, [nil, [], broken_tag]] if broken_tag
51
+ else
74
52
 
75
- # is it a self closing tag?
76
- if a[i-1] == '/' then
53
+ text, tag = tag.sub('>',';tg&').split(/>/,2)
54
+ r.prepend '>' + tag
55
+ return [:child, text]
56
+ end
77
57
 
78
- raw_values << a.shift until (a[0] + a[1..-1].join.strip[0]) == '/>'
79
- a.shift(2)
58
+ else
80
59
 
81
- after_text = []
82
- after_text << a.shift until a[0] == '<' or a.length <= 1
83
- #a.shift until a[0] == '<' or a.length < 1
84
- raw_values.strip!
60
+ # it's a start tag?
61
+ return [:newnode] if tag[/^>.*[\w!]+\/<$/]
85
62
 
86
- attributes = raw_values.length > 0 ? get_attributes(raw_values) : {}
87
- element = [name, nil, attributes]
63
+ end # end of tag match
88
64
 
89
- return element if after_text.empty?
90
- return [element, after_text.join]
65
+ else
91
66
 
92
- else
67
+ # it's a text value
68
+ text = r.slice!(/[^>]+/)
69
+ return [:child, text] if text
70
+ end
71
+ end
93
72
 
94
- raw_values << a.shift until a[0] == '<'
73
+ def parse(r, j=nil)
95
74
 
96
- if raw_values[1..-1].length > 0 then
97
- value, attributes = get_value_and_attribs(raw_values)
98
- end
75
+ tag = r.slice!(/^>[^<]+</) if (r =~ /^>[^<]+</) == 0
99
76
 
100
- element = [name, value, attributes || {}]
101
- tag = a[0, name.length + 3].join
102
-
103
- return unless a.length > 0
104
-
105
- children = tag == ("</%s>" % name) ? false : true
106
-
107
- if children == true then
77
+ if tag[0,3] == '>--' then
108
78
 
109
- xa = scan_elements(a, element) until (a[0, name.length + 3].join \
110
- == "</%s>" % [name]) or a.length < 2
79
+ i = r =~ /<--/
80
+ tag += r.slice!(0,i+5)
81
+ # it's a comment tag
82
+ tagname = '-!'
83
+ return [">#{tagname}<", [tag[/>--(.*)--!</,1]], ">#{tagname}/<"]
84
+ end
85
+
86
+ tagname = tag[/([\w!]+)\/?<$/,1]
111
87
 
112
- xa.shift until xa[0] == '>' or xa.length <= 1
113
- xa.shift
114
- after_text = []
115
- after_text << xa.shift until xa[0] == '<' or xa.length <= 1
116
-
117
- return after_text.length >= 1 ? [element, after_text.join] : element
88
+ # self closing tag?
89
+ if tag[/^>\/.*#{tagname}<$/] then
90
+ return [">/#{tagname}<", [], "#{tag.sub(/>\//,'>')}"]
91
+ end
118
92
 
119
- else
93
+ start_tag, children, end_tag = tag, [], nil
120
94
 
121
- #check for its end tag
122
- a.slice!(0, name.length + 3) if a[0, name.length + 3].join \
123
- == "</%s>" % name
124
- after_text = []
125
- after_text << a.shift until a[0] == '<' or a.length <= 1
95
+ until end_tag do
126
96
 
127
- return after_text.length >= 1 ? [element, after_text.join] : element
97
+ key, res = scan_next r, tagname
128
98
 
129
- end
99
+ case key
100
+ when :end_tag
101
+ end_tag = res
102
+ return [start_tag, children, end_tag]
103
+ when :child
104
+ children << res
105
+ when :newnode
106
+ children << parse(r, tagname)
130
107
  end
131
108
  end
132
109
 
133
- end
134
-
135
- def scan_elements(a, element)
136
- r = scan_element(a)
137
-
138
- if r and r[0].is_a?(Array) then
139
- element = r.inject(element) {|r,x| r << x} if r
140
- elsif r
141
- element << r
142
- end
143
- return a
144
- end
145
-
146
- def get_value_and_attribs(raw_values)
147
-
148
- match_found = raw_values.match(/([^>]*)>(.*)/m)
149
-
150
- if match_found then
151
- raw_attributes, value = match_found.captures
152
- attributes = get_attributes(raw_attributes)
153
- end
154
-
155
- [value.gsub('>','&gt;').gsub('<','&lt;'), attributes]
110
+ [start_tag, children, end_tag]
156
111
  end
157
112
 
158
113
  def get_attributes(raw_attributes)
@@ -169,4 +124,27 @@ class RexleParser
169
124
 
170
125
  return r
171
126
  end
127
+
128
+ def reverse(raw_obj)
129
+
130
+ obj = raw_obj.clone
131
+ return obj.reverse! if obj.is_a? String
132
+
133
+ tag = obj.pop.reverse.sub('!cdata','!-')
134
+
135
+ children = obj[-1]
136
+
137
+ if children.last.is_a?(String) then
138
+ ltext ||= ''
139
+ ltext << children.pop.reverse
140
+ end
141
+
142
+ ltext << children.pop.reverse if children.last.is_a?(String)
143
+
144
+ r = children.reverse.map do |x|
145
+ reverse(x)
146
+ end
147
+
148
+ return [tag[/[!\-\w\[]+/], ltext, get_attributes(tag), *r]
149
+ end
172
150
  end
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rexleparser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.19
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
@@ -31,7 +31,7 @@ cert_chain:
31
31
  CkjTMLaPwIQI6dsbG4bVJ7/XzL7c8niqJSF7M0yr1+2kUrWFUZMBMrUUxgZxSkjL
32
32
  Cgd76bp2zjiyCw==
33
33
  -----END CERTIFICATE-----
34
- date: 2014-11-20 00:00:00.000000000 Z
34
+ date: 2015-01-25 00:00:00.000000000 Z
35
35
  dependencies: []
36
36
  description:
37
37
  email: james@r0bertson.co.uk
metadata.gz.sig CHANGED
Binary file