wikitext 0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/ext/ary.h +99 -0
- data/ext/depend +22 -0
- data/ext/extconf.rb +23 -0
- data/ext/parser.c +2174 -0
- data/ext/parser.h +31 -0
- data/ext/str.h +135 -0
- data/ext/token.c +109 -0
- data/ext/token.h +95 -0
- data/ext/wikitext.c +60 -0
- data/ext/wikitext.h +30 -0
- data/ext/wikitext_ragel.c +3354 -0
- data/ext/wikitext_ragel.h +17 -0
- data/spec/autolinking_spec.rb +122 -0
- data/spec/blockquote_spec.rb +570 -0
- data/spec/em_spec.rb +97 -0
- data/spec/encoding_spec.rb +124 -0
- data/spec/entity_spec.rb +40 -0
- data/spec/external_link_spec.rb +289 -0
- data/spec/h1_spec.rb +59 -0
- data/spec/h2_spec.rb +59 -0
- data/spec/h3_spec.rb +59 -0
- data/spec/h4_spec.rb +59 -0
- data/spec/h5_spec.rb +59 -0
- data/spec/h6_spec.rb +59 -0
- data/spec/indentation_spec.rb +70 -0
- data/spec/integration_spec.rb +265 -0
- data/spec/internal_link_spec.rb +445 -0
- data/spec/line_endings_spec.rb +81 -0
- data/spec/link_encoding_spec.rb +132 -0
- data/spec/link_sanitizing_spec.rb +228 -0
- data/spec/nowiki_spec.rb +155 -0
- data/spec/p_spec.rb +44 -0
- data/spec/pre_spec.rb +411 -0
- data/spec/regressions_spec.rb +45 -0
- data/spec/spec_helper.rb +77 -0
- data/spec/strong_em_spec.rb +89 -0
- data/spec/strong_spec.rb +99 -0
- data/spec/tokenizing_spec.rb +190 -0
- data/spec/tt_spec.rb +100 -0
- data/spec/ul_spec.rb +307 -0
- data/spec/wikitext_spec.rb +50 -0
- metadata +93 -0
@@ -0,0 +1,45 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# Copyright 2008 Wincent Colaiuta
|
3
|
+
# This program is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU General Public License as published by
|
5
|
+
# the Free Software Foundation, either version 3 of the License, or
|
6
|
+
# (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This program is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU General Public License
|
14
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
|
+
|
16
|
+
require File.join(File.dirname(__FILE__), 'spec_helper.rb')
|
17
|
+
require 'wikitext'
|
18
|
+
|
19
|
+
# this is a general-purpose file in which I'll add specs for former bugs to make sure that they don't regress
|
20
|
+
describe Wikitext::Parser, 'regressions' do
|
21
|
+
before do
|
22
|
+
@parser = Wikitext::Parser.new
|
23
|
+
end
|
24
|
+
|
25
|
+
it 'should correctly transform example #1' do
|
26
|
+
# turns out that this was never a bug in wikitext: it was a bug in the host application
|
27
|
+
input = dedent <<-END
|
28
|
+
= Leopard =
|
29
|
+
|
30
|
+
* punto 1
|
31
|
+
* punto 2
|
32
|
+
|
33
|
+
Y [[otro articulo]].
|
34
|
+
END
|
35
|
+
expected = dedent <<-END
|
36
|
+
<h1>Leopard</h1>
|
37
|
+
<ul>
|
38
|
+
<li>punto 1</li>
|
39
|
+
<li>punto 2</li>
|
40
|
+
</ul>
|
41
|
+
<p>Y <a href="/wiki/otro%20articulo">otro articulo</a>.</p>
|
42
|
+
END
|
43
|
+
@parser.parse(input).should == expected
|
44
|
+
end
|
45
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
# Copyright 2007-2008 Wincent Colaiuta
|
2
|
+
# This program is free software: you can redistribute it and/or modify
|
3
|
+
# it under the terms of the GNU General Public License as published by
|
4
|
+
# the Free Software Foundation, either version 3 of the License, or
|
5
|
+
# (at your option) any later version.
|
6
|
+
#
|
7
|
+
# This program is distributed in the hope that it will be useful,
|
8
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
9
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
10
|
+
# GNU General Public License for more details.
|
11
|
+
#
|
12
|
+
# You should have received a copy of the GNU General Public License
|
13
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
14
|
+
|
15
|
+
require 'pathname'
|
16
|
+
require 'rubygems'
|
17
|
+
require 'spec'
|
18
|
+
|
19
|
+
# allow indenting of multiline spec data for better readability
|
20
|
+
# but must dedent it before actually doing the comparison
|
21
|
+
def dedent spaces, string = nil
|
22
|
+
if spaces.kind_of? String
|
23
|
+
if not string.nil?
|
24
|
+
raise 'When first argument is a String, second argument must be nil'
|
25
|
+
else
|
26
|
+
# default use: single String parameter, dedent by 6
|
27
|
+
string = spaces
|
28
|
+
spaces = 6
|
29
|
+
end
|
30
|
+
elsif spaces.kind_of? Integer
|
31
|
+
if string.nil? or not string.kind_of?(String)
|
32
|
+
raise 'When first argument is a number, second must be a String'
|
33
|
+
end
|
34
|
+
else
|
35
|
+
raise 'Invalid argument'
|
36
|
+
end
|
37
|
+
string.each do |line|
|
38
|
+
if not line =~ /\A {#{spaces.to_i}}/
|
39
|
+
raise "Underlength indent for line: #{line.inspect}"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
string.gsub /^ {#{spaces.to_i}}/, ''
|
43
|
+
end
|
44
|
+
|
45
|
+
module Wikitext
|
46
|
+
if not const_defined? 'EXTDIR'
|
47
|
+
# append the local "ext" directory to search path if not already present
|
48
|
+
base = File.join(File.dirname(__FILE__), '..')
|
49
|
+
EXTDIR = Pathname.new(File.join(base, 'ext')).realpath
|
50
|
+
normalized = $:.collect { |path| Pathname.new(path).realpath rescue path }
|
51
|
+
$:.push(EXTDIR) unless normalized.include?(EXTDIR)
|
52
|
+
end
|
53
|
+
end # module Wikitext
|
54
|
+
|
55
|
+
module UTF8
|
56
|
+
if not const_defined? 'Invalid'
|
57
|
+
module Invalid
|
58
|
+
TWO_BYTES_MISSING_SECOND_BYTE = [0b11011111].pack('C*')
|
59
|
+
TWO_BYTES_MALFORMED_SECOND_BYTE = [0b11011111, 0b00001111].pack('C*') # should be 10......
|
60
|
+
OVERLONG = [0b11000000, 0b10000000].pack('C*') # lead byte is 110..... but code point is <= 127
|
61
|
+
OVERLONG_ALT = [0b11000001, 0b10000000].pack('C*') # lead byte is 110..... but code point is <= 127
|
62
|
+
THREE_BYTES_MISSING_SECOND_BYTE = [0b11100000].pack('C*')
|
63
|
+
THREE_BYTES_MISSING_THIRD_BYTE = [0b11100000, 0b10000000].pack('C*')
|
64
|
+
THREE_BYTES_MALFORMED_SECOND_BYTE = [0b11100000, 0b00001111, 0b10000000].pack('C*') # should be 10......
|
65
|
+
THREE_BYTES_MALFORMED_THIRD_BYTE = [0b11100000, 0b10000000, 0b00001111].pack('C*') # should be 10......
|
66
|
+
FOUR_BYTES_MISSING_SECOND_BYTE = [0b11110000].pack('C*')
|
67
|
+
FOUR_BYTES_MISSING_THIRD_BYTE = [0b11110000, 0x10111111].pack('C*')
|
68
|
+
FOUR_BYTES_MISSING_FOURTH_BYTE = [0b11110000, 0x10111111, 0x10111111].pack('C*')
|
69
|
+
FOUR_BYTES_ILLEGAL_FIRST_BYTE = [0b11110101, 0x10111111, 0x10111111, 0x10111111].pack('C*')
|
70
|
+
FOUR_BYTES_ILLEGAL_FIRST_BYTE_ALT = [0b11110101, 0x10111111, 0x10111111, 0x10111111].pack('C*')
|
71
|
+
FOUR_BYTES_ILLEGAL_FIRST_BYTE_ALT2 = [0b11110101, 0x10111111, 0x10111111, 0x10111111].pack('C*')
|
72
|
+
UNEXPECTED_BYTE = [0b11111000].pack('C*')
|
73
|
+
end # module Invalid
|
74
|
+
end
|
75
|
+
end # module UTF8
|
76
|
+
|
77
|
+
require 'wikitext'
|
@@ -0,0 +1,89 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# Copyright 2007-2008 Wincent Colaiuta
|
3
|
+
# This program is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU General Public License as published by
|
5
|
+
# the Free Software Foundation, either version 3 of the License, or
|
6
|
+
# (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This program is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU General Public License
|
14
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
|
+
|
16
|
+
require File.join(File.dirname(__FILE__), 'spec_helper.rb')
|
17
|
+
require 'wikitext'
|
18
|
+
|
19
|
+
describe Wikitext::Parser, 'parsing combined <strong>/<em> spans' do
|
20
|
+
before do
|
21
|
+
@parser = Wikitext::Parser.new
|
22
|
+
end
|
23
|
+
|
24
|
+
it 'should recognize paired "<strong><em>" tokens' do
|
25
|
+
@parser.parse("foo '''''bar''''' baz").should == "<p>foo <strong><em>bar</em></strong> baz</p>\n"
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'should automatically insert missing closing tags' do
|
29
|
+
@parser.parse("foo '''''bar").should == "<p>foo <strong><em>bar</em></strong></p>\n" # totally missing
|
30
|
+
@parser.parse("foo '''''bar''").should == "<p>foo <strong><em>bar</em></strong></p>\n" # only <strong> missing
|
31
|
+
@parser.parse("foo '''''bar'''").should == "<p>foo <strong><em>bar</em></strong></p>\n" # only <em> missing
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'should automatically close unclosed spans upon hitting newlines' do
|
35
|
+
@parser.parse("foo '''''bar\nbaz").should == "<p>foo <strong><em>bar</em></strong> baz</p>\n" # totally missing
|
36
|
+
@parser.parse("foo '''''bar''\nbaz").should == "<p>foo <strong><em>bar</em></strong> baz</p>\n" # only <strong> missing
|
37
|
+
@parser.parse("foo '''''bar'''\nbaz").should == "<p>foo <strong><em>bar</em></strong> baz</p>\n" # only <em> missing
|
38
|
+
end
|
39
|
+
|
40
|
+
it 'should allow combined "<strong><em>" tokens to interact with separate <strong> and <em> tokens' do
|
41
|
+
@parser.parse("foo '''bar ''baz'''''").should == "<p>foo <strong>bar <em>baz</em></strong></p>\n"
|
42
|
+
@parser.parse("foo ''bar '''baz'''''").should == "<p>foo <em>bar <strong>baz</strong></em></p>\n"
|
43
|
+
@parser.parse("'''''foo'' bar''' baz").should == "<p><strong><em>foo</em> bar</strong> baz</p>\n"
|
44
|
+
end
|
45
|
+
|
46
|
+
it 'should handle (illegal) interleaved spans' do
|
47
|
+
# ''''' means "<strong><em>" so when we see ''' we try to close the <strong> first, which makes for illegal nesting
|
48
|
+
@parser.parse("'''''foo''' bar'' baz").should == "<p><strong><em>foo</em></strong> bar<em> baz</em></p>\n"
|
49
|
+
|
50
|
+
# note that if you really want ''''' to be parsed as "<em><strong>" you have to use whitespace to disambiguate
|
51
|
+
# for more examples see the "disambiguation" specs below
|
52
|
+
@parser.parse("'' '''foo''' bar'' baz").should == "<p><em> <strong>foo</strong> bar</em> baz</p>\n"
|
53
|
+
end
|
54
|
+
|
55
|
+
it 'should have no effect inside <pre> blocks' do
|
56
|
+
@parser.parse(" '''''foo'''''").should == "<pre>'''''foo'''''</pre>\n"
|
57
|
+
end
|
58
|
+
|
59
|
+
it 'should have no effect inside <nowiki> spans' do
|
60
|
+
@parser.parse("<nowiki>'''''foo'''''</nowiki>").should == "<p>'''''foo'''''</p>\n"
|
61
|
+
end
|
62
|
+
|
63
|
+
describe 'disambiguation' do
|
64
|
+
it 'should by default assume strong followed by em' do
|
65
|
+
@parser.parse("'''''foo'''''").should == "<p><strong><em>foo</em></strong></p>\n"
|
66
|
+
end
|
67
|
+
|
68
|
+
it 'should accept an empty nowiki span as a means of imposing em followed by strong' do
|
69
|
+
@parser.parse("''<nowiki></nowiki>'''foo'''''").should == "<p><em><strong>foo</strong></em></p>\n"
|
70
|
+
end
|
71
|
+
|
72
|
+
it 'should accept whitespace as a means of imposing em followed by strong' do
|
73
|
+
# when rendered in the browser the whitespace won't have any visual effect
|
74
|
+
@parser.parse("'' '''foo'''''").should == "<p><em> <strong>foo</strong></em></p>\n"
|
75
|
+
end
|
76
|
+
|
77
|
+
it 'should accept a literal <em> tag as a means of imposing em followed by strong' do
|
78
|
+
@parser.parse("<em>'''foo'''</em>").should == "<p><em><strong>foo</strong></em></p>\n"
|
79
|
+
end
|
80
|
+
|
81
|
+
it 'should accept a literal <strong> tag as a means of imposing em followed by strong' do
|
82
|
+
@parser.parse("''<strong>foo</strong>''").should == "<p><em><strong>foo</strong></em></p>\n"
|
83
|
+
end
|
84
|
+
|
85
|
+
it 'should accept literal <em> and <strong> tags as a means of imposing em followed by strong' do
|
86
|
+
@parser.parse("<em><strong>foo</strong></em>").should == "<p><em><strong>foo</strong></em></p>\n"
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
data/spec/strong_spec.rb
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# Copyright 2007-2008 Wincent Colaiuta
|
3
|
+
# This program is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU General Public License as published by
|
5
|
+
# the Free Software Foundation, either version 3 of the License, or
|
6
|
+
# (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This program is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU General Public License
|
14
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
|
+
|
16
|
+
require File.join(File.dirname(__FILE__), 'spec_helper.rb')
|
17
|
+
require 'wikitext'
|
18
|
+
|
19
|
+
describe Wikitext::Parser, 'parsing <strong> spans' do
|
20
|
+
before do
|
21
|
+
@parser = Wikitext::Parser.new
|
22
|
+
end
|
23
|
+
|
24
|
+
describe 'marked up using wikitext shorthand' do
|
25
|
+
it 'should recognize paired <strong> tokens' do
|
26
|
+
@parser.parse("foo '''bar''' baz").should == "<p>foo <strong>bar</strong> baz</p>\n"
|
27
|
+
end
|
28
|
+
|
29
|
+
it 'should automatically insert missing closing tags' do
|
30
|
+
@parser.parse("foo '''bar").should == "<p>foo <strong>bar</strong></p>\n"
|
31
|
+
end
|
32
|
+
|
33
|
+
it 'should automatically close unclosed spans upon hitting newlines' do
|
34
|
+
@parser.parse("foo '''bar\nbaz").should == "<p>foo <strong>bar</strong> baz</p>\n"
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'should handle (illegal) interleaved spans' do
|
38
|
+
@parser.parse("foo '''bar ''inner''' baz''").should == "<p>foo <strong>bar <em>inner</em></strong> baz<em></em></p>\n"
|
39
|
+
end
|
40
|
+
|
41
|
+
it 'should have no effect inside <pre> blocks' do
|
42
|
+
@parser.parse(" '''foo'''").should == "<pre>'''foo'''</pre>\n"
|
43
|
+
end
|
44
|
+
|
45
|
+
it 'should have no effect inside <nowiki> spans' do
|
46
|
+
@parser.parse("<nowiki>'''foo'''</nowiki>").should == "<p>'''foo'''</p>\n"
|
47
|
+
end
|
48
|
+
|
49
|
+
it "should have no effect if a strong (<strong>) span is already open" do
|
50
|
+
@parser.parse("foo <strong>'''bar'''</strong> baz").should == "<p>foo <strong>'''bar'''</strong> baz</p>\n"
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
describe 'marked up using HTML tags' do
|
55
|
+
it 'should recognized paired <strong> tokens' do
|
56
|
+
@parser.parse("foo <strong>bar</strong> baz").should == "<p>foo <strong>bar</strong> baz</p>\n"
|
57
|
+
end
|
58
|
+
|
59
|
+
it 'should recognize <strong> tokens case-insensitively' do
|
60
|
+
@parser.parse("foo <STRong>bar</STRONG> baz").should == "<p>foo <strong>bar</strong> baz</p>\n"
|
61
|
+
@parser.parse("foo <strONG>bar</STRong> baz").should == "<p>foo <strong>bar</strong> baz</p>\n"
|
62
|
+
@parser.parse("foo <STRONG>bar</strONG> baz").should == "<p>foo <strong>bar</strong> baz</p>\n"
|
63
|
+
end
|
64
|
+
|
65
|
+
it 'should automatically insert missing closing tags' do
|
66
|
+
@parser.parse("foo <strong>bar").should == "<p>foo <strong>bar</strong></p>\n"
|
67
|
+
end
|
68
|
+
|
69
|
+
it 'should automatically close unclosed spans upon hitting newlines' do
|
70
|
+
@parser.parse("foo <strong>bar\nbaz").should == "<p>foo <strong>bar</strong> baz</p>\n"
|
71
|
+
end
|
72
|
+
|
73
|
+
it 'should handle (illegal) interleaved spans' do
|
74
|
+
expected = "<p>foo <strong>bar <em>inner</em></strong> baz</em></p>\n"
|
75
|
+
@parser.parse("foo <strong>bar <em>inner</strong> baz</em>").should == expected
|
76
|
+
|
77
|
+
expected = "<p>foo <strong>bar <em>inner</em></strong> baz<em></em></p>\n"
|
78
|
+
@parser.parse("foo <strong>bar ''inner</strong> baz''").should == expected
|
79
|
+
end
|
80
|
+
|
81
|
+
it 'should handle (illegal) nested <strong> spans' do
|
82
|
+
expected = "<p>foo <strong>bar <strong>inner</strong></strong> baz</p>\n"
|
83
|
+
@parser.parse('foo <strong>bar <strong>inner</strong></strong> baz').should == expected
|
84
|
+
end
|
85
|
+
|
86
|
+
it 'should have no effect inside <pre> blocks' do
|
87
|
+
@parser.parse(" <strong>foo</strong>").should == "<pre><strong>foo</strong></pre>\n"
|
88
|
+
end
|
89
|
+
|
90
|
+
it 'should have no effect inside <nowiki> spans' do
|
91
|
+
@parser.parse("<nowiki><strong>foo</strong></nowiki>").should == "<p><strong>foo</strong></p>\n"
|
92
|
+
end
|
93
|
+
|
94
|
+
it "should have no effect if an strong (''') span is already open" do
|
95
|
+
expected = "<p>foo <strong><strong>bar</strong></strong> baz</p>\n"
|
96
|
+
@parser.parse("foo '''<strong>bar</strong>''' baz").should == expected
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
@@ -0,0 +1,190 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# Copyright 2008 Wincent Colaiuta
|
3
|
+
# This program is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU General Public License as published by
|
5
|
+
# the Free Software Foundation, either version 3 of the License, or
|
6
|
+
# (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This program is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU General Public License
|
14
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
|
+
|
16
|
+
require File.join(File.dirname(__FILE__), 'spec_helper.rb')
|
17
|
+
require 'wikitext'
|
18
|
+
|
19
|
+
describe Wikitext::Parser::Token do
|
20
|
+
before do
|
21
|
+
@tokens = Wikitext::Parser::Token.types
|
22
|
+
end
|
23
|
+
|
24
|
+
it 'should report the available token types as a hash' do
|
25
|
+
@tokens.should be_kind_of(Hash)
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'should report token names as symbols and values as numbers' do
|
29
|
+
@tokens.each do |k, v|
|
30
|
+
v.should be_kind_of(Symbol)
|
31
|
+
k.should be_kind_of(Integer)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'should report unique token names and values' do
|
36
|
+
keys = @tokens.keys
|
37
|
+
keys.uniq.length.should == keys.length
|
38
|
+
values = @tokens.values
|
39
|
+
values.uniq.length.should == values.length
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
describe Wikitext::Parser, 'tokenizing' do
|
44
|
+
before do
|
45
|
+
@parser = Wikitext::Parser.new
|
46
|
+
end
|
47
|
+
|
48
|
+
it 'should do nothing if passed nil' do
|
49
|
+
@parser.tokenize(nil).should == nil
|
50
|
+
end
|
51
|
+
|
52
|
+
it "should complain if passed an object that doesn't quack like a string" do
|
53
|
+
lambda { @parser.tokenize({}) }.should raise_error
|
54
|
+
end
|
55
|
+
|
56
|
+
it 'should tokenize strings containing a single symbol' do
|
57
|
+
@tokens = @parser.tokenize('foo')
|
58
|
+
@tokens.length.should == 2
|
59
|
+
@tokens[0].token_type.should == :printable
|
60
|
+
@tokens[0].string_value.should == 'foo'
|
61
|
+
@tokens[1].token_type.should == :end_of_file
|
62
|
+
@tokens[1].string_value.should == ''
|
63
|
+
end
|
64
|
+
|
65
|
+
it 'should tokenize strings containing multiple symbols' do
|
66
|
+
@tokens = @parser.tokenize('foo http://example.com/')
|
67
|
+
@tokens.length.should == 4
|
68
|
+
@tokens[0].token_type.should == :printable
|
69
|
+
@tokens[0].string_value.should == 'foo'
|
70
|
+
@tokens[1].token_type.should == :space
|
71
|
+
@tokens[1].string_value.should == ' '
|
72
|
+
@tokens[2].token_type.should == :uri
|
73
|
+
@tokens[2].string_value.should == 'http://example.com/'
|
74
|
+
@tokens[3].token_type.should == :end_of_file
|
75
|
+
@tokens[3].string_value.should == ''
|
76
|
+
end
|
77
|
+
|
78
|
+
it 'should tokenize runs of printable characters as as single symbol' do
|
79
|
+
@tokens = @parser.tokenize('foo')
|
80
|
+
@tokens.length.should == 2
|
81
|
+
@tokens[0].token_type.should == :printable
|
82
|
+
@tokens[0].string_value.should == 'foo'
|
83
|
+
@tokens[0].line_start.should == 1
|
84
|
+
@tokens[0].column_start.should == 1
|
85
|
+
@tokens[0].line_stop.should == 1
|
86
|
+
@tokens[0].column_stop.should == 4
|
87
|
+
@tokens[1].token_type.should == :end_of_file
|
88
|
+
@tokens[1].string_value.should == ''
|
89
|
+
end
|
90
|
+
|
91
|
+
it 'should tokenize END_OF_FILE tokens as zero-width tokens' do
|
92
|
+
@tokens = @parser.tokenize('')
|
93
|
+
@tokens.length.should == 1
|
94
|
+
@tokens[0].token_type.should == :end_of_file
|
95
|
+
@tokens[0].line_start.should == 1
|
96
|
+
@tokens[0].column_start.should == 1
|
97
|
+
@tokens[0].line_stop.should == 1
|
98
|
+
@tokens[0].column_stop.should == 1
|
99
|
+
@tokens[0].string_value.should == ''
|
100
|
+
end
|
101
|
+
|
102
|
+
it 'should be able to tokenize strings containing "}"' do
|
103
|
+
# was a bug: we were throwing an exception "failed before finding a token" because our PRINTABLE rule omitted this code point
|
104
|
+
lambda { @tokens = @parser.tokenize('}') }.should_not raise_error
|
105
|
+
@tokens.length.should == 2
|
106
|
+
@tokens[0].token_type.should == :printable
|
107
|
+
@tokens[0].string_value.should == '}'
|
108
|
+
@tokens[0].line_start.should == 1
|
109
|
+
@tokens[0].column_start.should == 1
|
110
|
+
@tokens[0].line_stop.should == 1
|
111
|
+
@tokens[0].column_stop.should == 2
|
112
|
+
@tokens[1].token_type.should == :end_of_file
|
113
|
+
@tokens[1].string_value.should == ''
|
114
|
+
end
|
115
|
+
|
116
|
+
it 'should be able to tokenize the full range of printable ASCII' do
|
117
|
+
# see the previous example: we just want to make sure that our PRINTABLE rule is adequate
|
118
|
+
printable_ascii = (0x20..0x7e).to_a.pack('C*')
|
119
|
+
lambda { @parser.tokenize(printable_ascii) }.should_not raise_error
|
120
|
+
end
|
121
|
+
|
122
|
+
it 'should be able to tokenize large blocks of text' do
|
123
|
+
large_block_of_text = dedent <<-END
|
124
|
+
paragraph
|
125
|
+
second line
|
126
|
+
|
127
|
+
new paragraph
|
128
|
+
|
129
|
+
= a heading =
|
130
|
+
|
131
|
+
> a blockquote
|
132
|
+
> second line of blockquote
|
133
|
+
>
|
134
|
+
> new paragraph within blockquote
|
135
|
+
|
136
|
+
== another heading ==
|
137
|
+
|
138
|
+
paragraph within ''multiple '''styles''''' and <tt>tt span</tt>
|
139
|
+
|
140
|
+
similar, but with '''styles in ''different'' order'''
|
141
|
+
|
142
|
+
again, a '''different ''order'''''
|
143
|
+
|
144
|
+
* list item 1
|
145
|
+
** nested list item 1
|
146
|
+
** nested list item 2
|
147
|
+
** nested list item 3
|
148
|
+
* list item 2
|
149
|
+
|
150
|
+
// this is a code block
|
151
|
+
notice how it can contain ''markup''
|
152
|
+
which would '''otherwise''' have <tt>special</tt> meaning
|
153
|
+
although explicit entities © are passed through unchanged
|
154
|
+
|
155
|
+
a normal paragraph again
|
156
|
+
|
157
|
+
This is where we show a link to an article on [[GCC]].
|
158
|
+
Related to that, [[GCC|a link]] to the same
|
159
|
+
article but with custom link text.
|
160
|
+
|
161
|
+
External links [http://example.com work too].
|
162
|
+
As well as autolinks as seen http://example.com/
|
163
|
+
here.
|
164
|
+
|
165
|
+
Look at how we handle bad syntax. [[This is an unterminated
|
166
|
+
link. And [http://example.com/ is another.
|
167
|
+
|
168
|
+
# this is an ordered list
|
169
|
+
# which continues
|
170
|
+
## and has another ordered list
|
171
|
+
## nested inside it
|
172
|
+
# and then falls back
|
173
|
+
#* and then nests another list
|
174
|
+
#* this time an unordered one
|
175
|
+
#** itself containing a nested list
|
176
|
+
#** which continues
|
177
|
+
#**# and finally nests yet another ordered list
|
178
|
+
#**# which continues
|
179
|
+
#* drops back quite a way
|
180
|
+
# and finally all the way
|
181
|
+
#****** and finishes with an invalid item
|
182
|
+
|
183
|
+
=== heading with missing closing tag
|
184
|
+
* list
|
185
|
+
# new list
|
186
|
+
END
|
187
|
+
@tokens = @parser.tokenize(large_block_of_text)
|
188
|
+
@tokens.length.should > 0
|
189
|
+
end
|
190
|
+
end
|