maiku 0.6.1.maiku
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/maruku.rb +141 -0
- data/lib/maruku/attributes.rb +175 -0
- data/lib/maruku/defaults.rb +71 -0
- data/lib/maruku/errors_management.rb +92 -0
- data/lib/maruku/ext/div.rb +133 -0
- data/lib/maruku/ext/math.rb +41 -0
- data/lib/maruku/ext/math/elements.rb +27 -0
- data/lib/maruku/ext/math/latex_fix.rb +12 -0
- data/lib/maruku/ext/math/mathml_engines/blahtex.rb +107 -0
- data/lib/maruku/ext/math/mathml_engines/itex2mml.rb +29 -0
- data/lib/maruku/ext/math/mathml_engines/none.rb +20 -0
- data/lib/maruku/ext/math/mathml_engines/ritex.rb +24 -0
- data/lib/maruku/ext/math/parsing.rb +119 -0
- data/lib/maruku/ext/math/to_html.rb +187 -0
- data/lib/maruku/ext/math/to_latex.rb +26 -0
- data/lib/maruku/helpers.rb +260 -0
- data/lib/maruku/input/charsource.rb +326 -0
- data/lib/maruku/input/extensions.rb +69 -0
- data/lib/maruku/input/html_helper.rb +189 -0
- data/lib/maruku/input/linesource.rb +111 -0
- data/lib/maruku/input/parse_block.rb +616 -0
- data/lib/maruku/input/parse_doc.rb +232 -0
- data/lib/maruku/input/parse_span_better.rb +746 -0
- data/lib/maruku/input/rubypants.rb +225 -0
- data/lib/maruku/input/type_detection.rb +147 -0
- data/lib/maruku/input_textile2/t2_parser.rb +163 -0
- data/lib/maruku/maruku.rb +33 -0
- data/lib/maruku/output/s5/fancy.rb +756 -0
- data/lib/maruku/output/s5/to_s5.rb +138 -0
- data/lib/maruku/output/to_html.rb +991 -0
- data/lib/maruku/output/to_latex.rb +590 -0
- data/lib/maruku/output/to_latex_entities.rb +367 -0
- data/lib/maruku/output/to_latex_strings.rb +64 -0
- data/lib/maruku/output/to_markdown.rb +164 -0
- data/lib/maruku/output/to_s.rb +56 -0
- data/lib/maruku/string_utils.rb +201 -0
- data/lib/maruku/structures.rb +167 -0
- data/lib/maruku/structures_inspect.rb +87 -0
- data/lib/maruku/structures_iterators.rb +61 -0
- data/lib/maruku/textile2.rb +1 -0
- data/lib/maruku/toc.rb +199 -0
- data/lib/maruku/usage/example1.rb +33 -0
- data/lib/maruku/version.rb +39 -0
- metadata +167 -0
@@ -0,0 +1,326 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
|
3
|
+
#
|
4
|
+
# This file is part of Maruku.
|
5
|
+
#
|
6
|
+
# Maruku is free software; you can redistribute it and/or modify
|
7
|
+
# it under the terms of the GNU General Public License as published by
|
8
|
+
# the Free Software Foundation; either version 2 of the License, or
|
9
|
+
# (at your option) any later version.
|
10
|
+
#
|
11
|
+
# Maruku is distributed in the hope that it will be useful,
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
# GNU General Public License for more details.
|
15
|
+
#
|
16
|
+
# You should have received a copy of the GNU General Public License
|
17
|
+
# along with Maruku; if not, write to the Free Software
|
18
|
+
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
19
|
+
#++
|
20
|
+
|
21
|
+
|
22
|
+
module MaRuKu; module In; module Markdown; module SpanLevelParser
|
23
|
+
|
24
|
+
# a string scanner coded by me
|
25
|
+
class CharSourceManual; end
|
26
|
+
|
27
|
+
# a wrapper around StringScanner
|
28
|
+
class CharSourceStrscan; end
|
29
|
+
|
30
|
+
# A debug scanner that checks the correctness of both
|
31
|
+
# by comparing their output
|
32
|
+
class CharSourceDebug; end
|
33
|
+
|
34
|
+
# Choose!
|
35
|
+
|
36
|
+
CharSource = CharSourceManual # faster! 58ms vs. 65ms
|
37
|
+
#CharSource = CharSourceStrscan
|
38
|
+
#CharSource = CharSourceDebug
|
39
|
+
|
40
|
+
|
41
|
+
class CharSourceManual
|
42
|
+
include MaRuKu::Strings
|
43
|
+
|
44
|
+
def initialize(s, parent=nil)
|
45
|
+
raise "Passed #{s.class}" if not s.kind_of? String
|
46
|
+
@buffer = s
|
47
|
+
@buffer_index = 0
|
48
|
+
@parent = parent
|
49
|
+
end
|
50
|
+
|
51
|
+
# Return current char as a FixNum (or nil).
|
52
|
+
def cur_char; @buffer[@buffer_index] end
|
53
|
+
|
54
|
+
# Return the next n chars as a String.
|
55
|
+
def cur_chars(n); @buffer[@buffer_index,n] end
|
56
|
+
|
57
|
+
# Return the char after current char as a FixNum (or nil).
|
58
|
+
def next_char; @buffer[@buffer_index+1] end
|
59
|
+
|
60
|
+
def shift_char
|
61
|
+
c = @buffer[@buffer_index]
|
62
|
+
@buffer_index+=1
|
63
|
+
c
|
64
|
+
end
|
65
|
+
|
66
|
+
def ignore_char
|
67
|
+
@buffer_index+=1
|
68
|
+
nil
|
69
|
+
end
|
70
|
+
|
71
|
+
def ignore_chars(n)
|
72
|
+
@buffer_index+=n
|
73
|
+
nil
|
74
|
+
end
|
75
|
+
|
76
|
+
def current_remaining_buffer
|
77
|
+
@buffer[@buffer_index, @buffer.size-@buffer_index]
|
78
|
+
end
|
79
|
+
|
80
|
+
def cur_chars_are(string)
|
81
|
+
# There is a bug here
|
82
|
+
if false
|
83
|
+
r2 = /^.{#{@buffer_index}}#{Regexp.escape string}/m
|
84
|
+
@buffer =~ r2
|
85
|
+
else
|
86
|
+
cur_chars(string.size) == string
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def next_matches(r)
|
91
|
+
r2 = /^.{#{@buffer_index}}#{r}/m
|
92
|
+
md = r2.match @buffer
|
93
|
+
return !!md
|
94
|
+
end
|
95
|
+
|
96
|
+
def read_regexp3(r)
|
97
|
+
r2 = /^.{#{@buffer_index}}#{r}/m
|
98
|
+
m = r2.match @buffer
|
99
|
+
if m
|
100
|
+
consumed = m.to_s.size - @buffer_index
|
101
|
+
# puts "Consumed #{consumed} chars (entire is #{m.to_s.inspect})"
|
102
|
+
ignore_chars consumed
|
103
|
+
else
|
104
|
+
# puts "Could not read regexp #{r2.inspect} from buffer "+
|
105
|
+
# " index=#{@buffer_index}"
|
106
|
+
# puts "Cur chars = #{cur_chars(20).inspect}"
|
107
|
+
# puts "Matches? = #{cur_chars(20) =~ r}"
|
108
|
+
end
|
109
|
+
m
|
110
|
+
end
|
111
|
+
|
112
|
+
def read_regexp(r)
|
113
|
+
r2 = /^#{r}/
|
114
|
+
rest = current_remaining_buffer
|
115
|
+
m = r2.match(rest)
|
116
|
+
if m
|
117
|
+
@buffer_index += m.to_s.size
|
118
|
+
# puts "#{r} matched #{rest.inspect}: #{m.to_s.inspect}"
|
119
|
+
end
|
120
|
+
return m
|
121
|
+
end
|
122
|
+
|
123
|
+
def consume_whitespace
|
124
|
+
while c = cur_char
|
125
|
+
if (c == ?\s || c == ?\t)
|
126
|
+
# puts "ignoring #{c}"
|
127
|
+
ignore_char
|
128
|
+
else
|
129
|
+
# puts "#{c} is not ws: "<<c
|
130
|
+
break
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
def read_text_chars(out)
|
136
|
+
s = @buffer.size; c=nil
|
137
|
+
while @buffer_index < s && (c=@buffer[@buffer_index]) &&
|
138
|
+
((c>=?a && c<=?z) || (c>=?A && c<=?Z))
|
139
|
+
out << c
|
140
|
+
@buffer_index += 1
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
def describe
|
145
|
+
s = describe_pos(@buffer, @buffer_index)
|
146
|
+
if @parent
|
147
|
+
s += "\n\n" + @parent.describe
|
148
|
+
end
|
149
|
+
s
|
150
|
+
end
|
151
|
+
include SpanLevelParser
|
152
|
+
end
|
153
|
+
|
154
|
+
def describe_pos(buffer, buffer_index)
|
155
|
+
len = 75
|
156
|
+
num_before = [len/2, buffer_index].min
|
157
|
+
num_after = [len/2, buffer.size-buffer_index].min
|
158
|
+
num_before_max = buffer_index
|
159
|
+
num_after_max = buffer.size-buffer_index
|
160
|
+
|
161
|
+
# puts "num #{num_before} #{num_after}"
|
162
|
+
num_before = [num_before_max, len-num_after].min
|
163
|
+
num_after = [num_after_max, len-num_before].min
|
164
|
+
# puts "num #{num_before} #{num_after}"
|
165
|
+
|
166
|
+
index_start = [buffer_index - num_before, 0].max
|
167
|
+
index_end = [buffer_index + num_after, buffer.size].min
|
168
|
+
|
169
|
+
size = index_end- index_start
|
170
|
+
|
171
|
+
# puts "- #{index_start} #{size}"
|
172
|
+
|
173
|
+
str = buffer[index_start, size]
|
174
|
+
str.gsub!("\n",'N')
|
175
|
+
str.gsub!("\t",'T')
|
176
|
+
|
177
|
+
if index_end == buffer.size
|
178
|
+
str += "EOF"
|
179
|
+
end
|
180
|
+
|
181
|
+
pre_s = buffer_index-index_start
|
182
|
+
pre_s = [pre_s, 0].max
|
183
|
+
pre_s2 = [len-pre_s,0].max
|
184
|
+
# puts "pre_S = #{pre_s}"
|
185
|
+
pre =" "*(pre_s)
|
186
|
+
|
187
|
+
"-"*len+"\n"+
|
188
|
+
str + "\n" +
|
189
|
+
"-"*pre_s + "|" + "-"*(pre_s2)+"\n"+
|
190
|
+
# pre + "|\n"+
|
191
|
+
pre + "+--- Byte #{buffer_index}\n"+
|
192
|
+
|
193
|
+
"Shown bytes [#{index_start} to #{size}] of #{buffer.size}:\n"+
|
194
|
+
add_tabs(buffer,1,">")
|
195
|
+
|
196
|
+
# "CharSource: At character #{@buffer_index} of block "+
|
197
|
+
# " beginning with:\n #{@buffer[0,50].inspect} ...\n"+
|
198
|
+
# " before: \n ... #{cur_chars(50).inspect} ... "
|
199
|
+
end
|
200
|
+
|
201
|
+
|
202
|
+
require 'strscan'
|
203
|
+
|
204
|
+
class CharSourceStrscan
|
205
|
+
include SpanLevelParser
|
206
|
+
include MaRuKu::Strings
|
207
|
+
|
208
|
+
def initialize(s, parent=nil)
|
209
|
+
@s = StringScanner.new(s)
|
210
|
+
@parent = parent
|
211
|
+
end
|
212
|
+
|
213
|
+
# Return current char as a FixNum (or nil).
|
214
|
+
def cur_char
|
215
|
+
@s.peek(1)[0]
|
216
|
+
end
|
217
|
+
|
218
|
+
# Return the next n chars as a String.
|
219
|
+
def cur_chars(n);
|
220
|
+
@s.peek(n)
|
221
|
+
end
|
222
|
+
|
223
|
+
# Return the char after current char as a FixNum (or nil).
|
224
|
+
def next_char;
|
225
|
+
@s.peek(2)[1]
|
226
|
+
end
|
227
|
+
|
228
|
+
def shift_char
|
229
|
+
(@s.get_byte)[0]
|
230
|
+
end
|
231
|
+
|
232
|
+
def ignore_char
|
233
|
+
@s.get_byte
|
234
|
+
nil
|
235
|
+
end
|
236
|
+
|
237
|
+
def ignore_chars(n)
|
238
|
+
n.times do @s.get_byte end
|
239
|
+
nil
|
240
|
+
end
|
241
|
+
|
242
|
+
def current_remaining_buffer
|
243
|
+
@s.rest #nil #@buffer[@buffer_index, @buffer.size-@buffer_index]
|
244
|
+
end
|
245
|
+
|
246
|
+
def cur_chars_are(string)
|
247
|
+
cur_chars(string.size) == string
|
248
|
+
end
|
249
|
+
|
250
|
+
def next_matches(r)
|
251
|
+
len = @s.match?(r)
|
252
|
+
return !!len
|
253
|
+
end
|
254
|
+
|
255
|
+
def read_regexp(r)
|
256
|
+
string = @s.scan(r)
|
257
|
+
if string
|
258
|
+
return r.match(string)
|
259
|
+
else
|
260
|
+
return nil
|
261
|
+
end
|
262
|
+
end
|
263
|
+
|
264
|
+
def consume_whitespace
|
265
|
+
@s.scan(/\s+/)
|
266
|
+
nil
|
267
|
+
end
|
268
|
+
|
269
|
+
def describe
|
270
|
+
describe_pos(@s.string, @s.pos)
|
271
|
+
end
|
272
|
+
|
273
|
+
end
|
274
|
+
|
275
|
+
|
276
|
+
class CharSourceDebug
|
277
|
+
def initialize(s, parent)
|
278
|
+
@a = CharSourceManual.new(s, parent)
|
279
|
+
@b = CharSourceStrscan.new(s, parent)
|
280
|
+
end
|
281
|
+
|
282
|
+
def method_missing(methodname, *args)
|
283
|
+
a_bef = @a.describe
|
284
|
+
b_bef = @b.describe
|
285
|
+
|
286
|
+
a = @a.send(methodname, *args)
|
287
|
+
b = @b.send(methodname, *args)
|
288
|
+
|
289
|
+
# if methodname == :describe
|
290
|
+
# return a
|
291
|
+
# end
|
292
|
+
|
293
|
+
if a.kind_of? MatchData
|
294
|
+
if a.to_a != b.to_a
|
295
|
+
puts "called: #{methodname}(#{args})"
|
296
|
+
puts "Matchdata:\na = #{a.to_a.inspect}\nb = #{b.to_a.inspect}"
|
297
|
+
puts "AFTER: "+@a.describe
|
298
|
+
puts "AFTER: "+@b.describe
|
299
|
+
puts "BEFORE: "+a_bef
|
300
|
+
puts "BEFORE: "+b_bef
|
301
|
+
puts caller.join("\n")
|
302
|
+
exit
|
303
|
+
end
|
304
|
+
else
|
305
|
+
if a!=b
|
306
|
+
puts "called: #{methodname}(#{args})"
|
307
|
+
puts "Attenzione!\na = #{a.inspect}\nb = #{b.inspect}"
|
308
|
+
puts ""+@a.describe
|
309
|
+
puts ""+@b.describe
|
310
|
+
puts caller.join("\n")
|
311
|
+
exit
|
312
|
+
end
|
313
|
+
end
|
314
|
+
|
315
|
+
if @a.cur_char != @b.cur_char
|
316
|
+
puts "Fuori sincronia dopo #{methodname}(#{args})"
|
317
|
+
puts ""+@a.describe
|
318
|
+
puts ""+@b.describe
|
319
|
+
exit
|
320
|
+
end
|
321
|
+
|
322
|
+
return a
|
323
|
+
end
|
324
|
+
end
|
325
|
+
|
326
|
+
end end end end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
module MaRuKu; module In; module Markdown
|
2
|
+
|
3
|
+
|
4
|
+
# Hash Fixnum -> name
|
5
|
+
SpanExtensionsTrigger = {}
|
6
|
+
|
7
|
+
|
8
|
+
class SpanExtension
|
9
|
+
# trigging chars
|
10
|
+
attr_accessor :chars
|
11
|
+
# trigging regexp
|
12
|
+
attr_accessor :regexp
|
13
|
+
# lambda
|
14
|
+
attr_accessor :block
|
15
|
+
end
|
16
|
+
|
17
|
+
# Hash String -> Extension
|
18
|
+
SpanExtensions = {}
|
19
|
+
|
20
|
+
def check_span_extensions(src, con)
|
21
|
+
c = src.cur_char
|
22
|
+
if extensions = SpanExtensionsTrigger[c]
|
23
|
+
extensions.each do |e|
|
24
|
+
if e.regexp && (match = src.next_matches(e.regexp))
|
25
|
+
return true if e.block.call(doc, src, con)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
return false # not special
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.register_span_extension(args)
|
33
|
+
e = SpanExtension.new
|
34
|
+
e.chars = [*args[:chars]]
|
35
|
+
e.regexp = args[:regexp]
|
36
|
+
e.block = args[:handler] || raise("No blocks passed")
|
37
|
+
e.chars.each do |c|
|
38
|
+
(SpanExtensionsTrigger[c] ||= []).push e
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def self.register_block_extension(args)
|
43
|
+
regexp = args[:regexp]
|
44
|
+
BlockExtensions[regexp] = (args[:handler] || raise("No blocks passed"))
|
45
|
+
end
|
46
|
+
|
47
|
+
# Hash Regexp -> Block
|
48
|
+
BlockExtensions = {}
|
49
|
+
|
50
|
+
def check_block_extensions(src, con, line)
|
51
|
+
BlockExtensions.each do |reg, block|
|
52
|
+
if m = reg.match(line)
|
53
|
+
block = BlockExtensions[reg]
|
54
|
+
accepted = block.call(doc, src, con)
|
55
|
+
return true if accepted
|
56
|
+
end
|
57
|
+
end
|
58
|
+
return false # not special
|
59
|
+
end
|
60
|
+
|
61
|
+
def any_matching_block_extension?(line)
|
62
|
+
BlockExtensions.each_key do |reg|
|
63
|
+
m = reg.match(line)
|
64
|
+
return m if m
|
65
|
+
end
|
66
|
+
return false
|
67
|
+
end
|
68
|
+
|
69
|
+
end end end
|
@@ -0,0 +1,189 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (C) 2006 Andrea Censi <andrea (at) rubyforge.org>
|
3
|
+
#
|
4
|
+
# This file is part of Maruku.
|
5
|
+
#
|
6
|
+
# Maruku is free software; you can redistribute it and/or modify
|
7
|
+
# it under the terms of the GNU General Public License as published by
|
8
|
+
# the Free Software Foundation; either version 2 of the License, or
|
9
|
+
# (at your option) any later version.
|
10
|
+
#
|
11
|
+
# Maruku is distributed in the hope that it will be useful,
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
# GNU General Public License for more details.
|
15
|
+
#
|
16
|
+
# You should have received a copy of the GNU General Public License
|
17
|
+
# along with Maruku; if not, write to the Free Software
|
18
|
+
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
19
|
+
#++
|
20
|
+
|
21
|
+
|
22
|
+
module MaRuKu; module In; module Markdown; module SpanLevelParser
|
23
|
+
|
24
|
+
# This class helps me read and sanitize HTML blocks
|
25
|
+
|
26
|
+
# I tried to do this with REXML, but wasn't able to. (suggestions?)
|
27
|
+
|
28
|
+
class HTMLHelper
|
29
|
+
include MaRuKu::Strings
|
30
|
+
|
31
|
+
Tag = %r{^<(/)?(\w+)\s*([^>]*)>}m
|
32
|
+
PartialTag = %r{^<.*}m
|
33
|
+
|
34
|
+
EverythingElse = %r{^[^<]+}m
|
35
|
+
CommentStart = %r{^<!--}x
|
36
|
+
CommentEnd = %r{^.*-->}
|
37
|
+
TO_SANITIZE = ['img','hr','br']
|
38
|
+
|
39
|
+
attr_reader :rest
|
40
|
+
|
41
|
+
def my_debug(s)
|
42
|
+
# puts "---"*10+"\n"+inspect+"\t>>>\t"s
|
43
|
+
end
|
44
|
+
|
45
|
+
def initialize
|
46
|
+
@rest = ""
|
47
|
+
@tag_stack = []
|
48
|
+
@m = nil
|
49
|
+
@already = ""
|
50
|
+
self.state = :inside_element
|
51
|
+
end
|
52
|
+
|
53
|
+
attr_accessor :state # = :inside_element, :inside_tag, :inside_comment,
|
54
|
+
|
55
|
+
def eat_this(line)
|
56
|
+
@rest = line + @rest
|
57
|
+
things_read = 0
|
58
|
+
until @rest.empty?
|
59
|
+
case self.state
|
60
|
+
when :inside_comment
|
61
|
+
if @m = CommentEnd.match(@rest)
|
62
|
+
@already += @m.pre_match + @m.to_s
|
63
|
+
@rest = @m.post_match
|
64
|
+
self.state = :inside_element
|
65
|
+
else
|
66
|
+
@already += @rest
|
67
|
+
@rest = ""
|
68
|
+
self.state = :inside_comment
|
69
|
+
end
|
70
|
+
when :inside_element
|
71
|
+
if @m = CommentStart.match(@rest)
|
72
|
+
things_read += 1
|
73
|
+
@already += @m.pre_match + @m.to_s
|
74
|
+
@rest = @m.post_match
|
75
|
+
self.state = :inside_comment
|
76
|
+
elsif @m = Tag.match(@rest) then
|
77
|
+
my_debug "#{@state}: Tag: #{@m.to_s.inspect}"
|
78
|
+
things_read += 1
|
79
|
+
handle_tag
|
80
|
+
self.state = :inside_element
|
81
|
+
elsif @m = PartialTag.match(@rest) then
|
82
|
+
my_debug "#{@state}: PartialTag: #{@m.to_s.inspect}"
|
83
|
+
@already += @m.pre_match
|
84
|
+
@rest = @m.post_match
|
85
|
+
@partial_tag = @m.to_s
|
86
|
+
self.state = :inside_tag
|
87
|
+
elsif @m = EverythingElse.match(@rest)
|
88
|
+
my_debug "#{@state}: Everything: #{@m.to_s.inspect}"
|
89
|
+
@already += @m.pre_match + @m.to_s
|
90
|
+
@rest = @m.post_match
|
91
|
+
self.state = :inside_element
|
92
|
+
else
|
93
|
+
error "Malformed HTML: not complete: #{@rest.inspect}"
|
94
|
+
end
|
95
|
+
when :inside_tag
|
96
|
+
if @m = /^[^>]*>/.match(@rest) then
|
97
|
+
my_debug "#{@state}: inside_tag: matched #{@m.to_s.inspect}"
|
98
|
+
@partial_tag += @m.to_s
|
99
|
+
my_debug "#{@state}: inside_tag: matched TOTAL: #{@partial_tag.to_s.inspect}"
|
100
|
+
@rest = @partial_tag + @m.post_match
|
101
|
+
@partial_tag = nil
|
102
|
+
self.state = :inside_element
|
103
|
+
else
|
104
|
+
@partial_tag += @rest
|
105
|
+
@rest = ""
|
106
|
+
self.state = :inside_tag
|
107
|
+
end
|
108
|
+
else
|
109
|
+
raise "Bug bug: state = #{self.state.inspect}"
|
110
|
+
end # not inside comment
|
111
|
+
|
112
|
+
# puts inspect
|
113
|
+
# puts "Read: #{@tag_stack.inspect}"
|
114
|
+
break if is_finished? and things_read>0
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
def handle_tag()
|
119
|
+
@already += @m.pre_match
|
120
|
+
@rest = @m.post_match
|
121
|
+
|
122
|
+
is_closing = !!@m[1]
|
123
|
+
tag = @m[2]
|
124
|
+
attributes = @m[3].to_s
|
125
|
+
|
126
|
+
is_single = false
|
127
|
+
if attributes[-1] == ?/ # =~ /\A(.*)\/\Z/
|
128
|
+
attributes = attributes[0, attributes.size-1]
|
129
|
+
is_single = true
|
130
|
+
end
|
131
|
+
|
132
|
+
my_debug "Attributes: #{attributes.inspect}"
|
133
|
+
my_debug "READ TAG #{@m.to_s.inspect} tag = #{tag} closing? #{is_closing} single = #{is_single}"
|
134
|
+
|
135
|
+
if TO_SANITIZE.include? tag
|
136
|
+
attributes.strip!
|
137
|
+
# puts "Attributes: #{attributes.inspect}"
|
138
|
+
if attributes.size > 0
|
139
|
+
@already += '<%s %s />' % [tag, attributes]
|
140
|
+
else
|
141
|
+
@already += '<%s />' % [tag]
|
142
|
+
end
|
143
|
+
elsif is_closing
|
144
|
+
@already += @m.to_s
|
145
|
+
if @tag_stack.empty?
|
146
|
+
error "Malformed: closing tag #{tag.inspect} "+
|
147
|
+
"in empty list"
|
148
|
+
end
|
149
|
+
if @tag_stack.last != tag
|
150
|
+
error "Malformed: tag <#{tag}> "+
|
151
|
+
"closes <#{@tag_stack.last}>"
|
152
|
+
end
|
153
|
+
@tag_stack.pop
|
154
|
+
else
|
155
|
+
@already += @m.to_s
|
156
|
+
|
157
|
+
if not is_single
|
158
|
+
@tag_stack.push(tag)
|
159
|
+
my_debug "Pushing #{tag.inspect} when read #{@m.to_s.inspect}"
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
163
|
+
def error(s)
|
164
|
+
raise Exception, "Error: #{s} \n"+ inspect, caller
|
165
|
+
end
|
166
|
+
|
167
|
+
def inspect; "HTML READER\n state=#{self.state} "+
|
168
|
+
"match=#{@m.to_s.inspect}\n"+
|
169
|
+
"Tag stack = #{@tag_stack.inspect} \n"+
|
170
|
+
"Before:\n"+
|
171
|
+
add_tabs(@already,1,'|')+"\n"+
|
172
|
+
"After:\n"+
|
173
|
+
add_tabs(@rest,1,'|')+"\n"
|
174
|
+
|
175
|
+
end
|
176
|
+
|
177
|
+
|
178
|
+
def stuff_you_read
|
179
|
+
@already
|
180
|
+
end
|
181
|
+
|
182
|
+
def rest() @rest end
|
183
|
+
|
184
|
+
def is_finished?
|
185
|
+
(self.state == :inside_element) and @tag_stack.empty?
|
186
|
+
end
|
187
|
+
end # html helper
|
188
|
+
|
189
|
+
end end end end
|