wikitext 0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,45 @@
1
+ #!/usr/bin/env ruby
2
+ # Copyright 2008 Wincent Colaiuta
3
+ # This program is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU General Public License
14
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
15
+
16
+ require File.join(File.dirname(__FILE__), 'spec_helper.rb')
17
+ require 'wikitext'
18
+
19
+ # this is a general-purpose file in which I'll add specs for former bugs to make sure that they don't regress
20
+ describe Wikitext::Parser, 'regressions' do
21
+ before do
22
+ @parser = Wikitext::Parser.new
23
+ end
24
+
25
+ it 'should correctly transform example #1' do
26
+ # turns out that this was never a bug in wikitext: it was a bug in the host application
27
+ input = dedent <<-END
28
+ = Leopard =
29
+
30
+ * punto 1
31
+ * punto 2
32
+
33
+ Y [[otro articulo]].
34
+ END
35
+ expected = dedent <<-END
36
+ <h1>Leopard</h1>
37
+ <ul>
38
+ <li>punto 1</li>
39
+ <li>punto 2</li>
40
+ </ul>
41
+ <p>Y <a href="/wiki/otro%20articulo">otro articulo</a>.</p>
42
+ END
43
+ @parser.parse(input).should == expected
44
+ end
45
+ end
@@ -0,0 +1,77 @@
1
+ # Copyright 2007-2008 Wincent Colaiuta
2
+ # This program is free software: you can redistribute it and/or modify
3
+ # it under the terms of the GNU General Public License as published by
4
+ # the Free Software Foundation, either version 3 of the License, or
5
+ # (at your option) any later version.
6
+ #
7
+ # This program is distributed in the hope that it will be useful,
8
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
9
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
+ # GNU General Public License for more details.
11
+ #
12
+ # You should have received a copy of the GNU General Public License
13
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
14
+
15
+ require 'pathname'
16
+ require 'rubygems'
17
+ require 'spec'
18
+
19
+ # allow indenting of multiline spec data for better readability
20
+ # but must dedent it before actually doing the comparison
21
+ def dedent spaces, string = nil
22
+ if spaces.kind_of? String
23
+ if not string.nil?
24
+ raise 'When first argument is a String, second argument must be nil'
25
+ else
26
+ # default use: single String parameter, dedent by 6
27
+ string = spaces
28
+ spaces = 6
29
+ end
30
+ elsif spaces.kind_of? Integer
31
+ if string.nil? or not string.kind_of?(String)
32
+ raise 'When first argument is a number, second must be a String'
33
+ end
34
+ else
35
+ raise 'Invalid argument'
36
+ end
37
+ string.each do |line|
38
+ if not line =~ /\A {#{spaces.to_i}}/
39
+ raise "Underlength indent for line: #{line.inspect}"
40
+ end
41
+ end
42
+ string.gsub /^ {#{spaces.to_i}}/, ''
43
+ end
44
+
45
+ module Wikitext
46
+ if not const_defined? 'EXTDIR'
47
+ # append the local "ext" directory to search path if not already present
48
+ base = File.join(File.dirname(__FILE__), '..')
49
+ EXTDIR = Pathname.new(File.join(base, 'ext')).realpath
50
+ normalized = $:.collect { |path| Pathname.new(path).realpath rescue path }
51
+ $:.push(EXTDIR) unless normalized.include?(EXTDIR)
52
+ end
53
+ end # module Wikitext
54
+
55
+ module UTF8
56
+ if not const_defined? 'Invalid'
57
+ module Invalid
58
+ TWO_BYTES_MISSING_SECOND_BYTE = [0b11011111].pack('C*')
59
+ TWO_BYTES_MALFORMED_SECOND_BYTE = [0b11011111, 0b00001111].pack('C*') # should be 10......
60
+ OVERLONG = [0b11000000, 0b10000000].pack('C*') # lead byte is 110..... but code point is <= 127
61
+ OVERLONG_ALT = [0b11000001, 0b10000000].pack('C*') # lead byte is 110..... but code point is <= 127
62
+ THREE_BYTES_MISSING_SECOND_BYTE = [0b11100000].pack('C*')
63
+ THREE_BYTES_MISSING_THIRD_BYTE = [0b11100000, 0b10000000].pack('C*')
64
+ THREE_BYTES_MALFORMED_SECOND_BYTE = [0b11100000, 0b00001111, 0b10000000].pack('C*') # should be 10......
65
+ THREE_BYTES_MALFORMED_THIRD_BYTE = [0b11100000, 0b10000000, 0b00001111].pack('C*') # should be 10......
66
+ FOUR_BYTES_MISSING_SECOND_BYTE = [0b11110000].pack('C*')
67
+ FOUR_BYTES_MISSING_THIRD_BYTE = [0b11110000, 0x10111111].pack('C*')
68
+ FOUR_BYTES_MISSING_FOURTH_BYTE = [0b11110000, 0x10111111, 0x10111111].pack('C*')
69
+ FOUR_BYTES_ILLEGAL_FIRST_BYTE = [0b11110101, 0x10111111, 0x10111111, 0x10111111].pack('C*')
70
+ FOUR_BYTES_ILLEGAL_FIRST_BYTE_ALT = [0b11110101, 0x10111111, 0x10111111, 0x10111111].pack('C*')
71
+ FOUR_BYTES_ILLEGAL_FIRST_BYTE_ALT2 = [0b11110101, 0x10111111, 0x10111111, 0x10111111].pack('C*')
72
+ UNEXPECTED_BYTE = [0b11111000].pack('C*')
73
+ end # module Invalid
74
+ end
75
+ end # module UTF8
76
+
77
+ require 'wikitext'
@@ -0,0 +1,89 @@
1
+ #!/usr/bin/env ruby
2
+ # Copyright 2007-2008 Wincent Colaiuta
3
+ # This program is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU General Public License
14
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
15
+
16
+ require File.join(File.dirname(__FILE__), 'spec_helper.rb')
17
+ require 'wikitext'
18
+
19
+ describe Wikitext::Parser, 'parsing combined <strong>/<em> spans' do
20
+ before do
21
+ @parser = Wikitext::Parser.new
22
+ end
23
+
24
+ it 'should recognize paired "<strong><em>" tokens' do
25
+ @parser.parse("foo '''''bar''''' baz").should == "<p>foo <strong><em>bar</em></strong> baz</p>\n"
26
+ end
27
+
28
+ it 'should automatically insert missing closing tags' do
29
+ @parser.parse("foo '''''bar").should == "<p>foo <strong><em>bar</em></strong></p>\n" # totally missing
30
+ @parser.parse("foo '''''bar''").should == "<p>foo <strong><em>bar</em></strong></p>\n" # only <strong> missing
31
+ @parser.parse("foo '''''bar'''").should == "<p>foo <strong><em>bar</em></strong></p>\n" # only <em> missing
32
+ end
33
+
34
+ it 'should automatically close unclosed spans upon hitting newlines' do
35
+ @parser.parse("foo '''''bar\nbaz").should == "<p>foo <strong><em>bar</em></strong> baz</p>\n" # totally missing
36
+ @parser.parse("foo '''''bar''\nbaz").should == "<p>foo <strong><em>bar</em></strong> baz</p>\n" # only <strong> missing
37
+ @parser.parse("foo '''''bar'''\nbaz").should == "<p>foo <strong><em>bar</em></strong> baz</p>\n" # only <em> missing
38
+ end
39
+
40
+ it 'should allow combined "<strong><em>" tokens to interact with separate <strong> and <em> tokens' do
41
+ @parser.parse("foo '''bar ''baz'''''").should == "<p>foo <strong>bar <em>baz</em></strong></p>\n"
42
+ @parser.parse("foo ''bar '''baz'''''").should == "<p>foo <em>bar <strong>baz</strong></em></p>\n"
43
+ @parser.parse("'''''foo'' bar''' baz").should == "<p><strong><em>foo</em> bar</strong> baz</p>\n"
44
+ end
45
+
46
+ it 'should handle (illegal) interleaved spans' do
47
+ # ''''' means "<strong><em>" so when we see ''' we try to close the <strong> first, which makes for illegal nesting
48
+ @parser.parse("'''''foo''' bar'' baz").should == "<p><strong><em>foo</em></strong> bar<em> baz</em></p>\n"
49
+
50
+ # note that if you really want ''''' to be parsed as "<em><strong>" you have to use whitespace to disambiguate
51
+ # for more examples see the "disambiguation" specs below
52
+ @parser.parse("'' '''foo''' bar'' baz").should == "<p><em> <strong>foo</strong> bar</em> baz</p>\n"
53
+ end
54
+
55
+ it 'should have no effect inside <pre> blocks' do
56
+ @parser.parse(" '''''foo'''''").should == "<pre>'''''foo'''''</pre>\n"
57
+ end
58
+
59
+ it 'should have no effect inside <nowiki> spans' do
60
+ @parser.parse("<nowiki>'''''foo'''''</nowiki>").should == "<p>'''''foo'''''</p>\n"
61
+ end
62
+
63
+ describe 'disambiguation' do
64
+ it 'should by default assume strong followed by em' do
65
+ @parser.parse("'''''foo'''''").should == "<p><strong><em>foo</em></strong></p>\n"
66
+ end
67
+
68
+ it 'should accept an empty nowiki span as a means of imposing em followed by strong' do
69
+ @parser.parse("''<nowiki></nowiki>'''foo'''''").should == "<p><em><strong>foo</strong></em></p>\n"
70
+ end
71
+
72
+ it 'should accept whitespace as a means of imposing em followed by strong' do
73
+ # when rendered in the browser the whitespace won't have any visual effect
74
+ @parser.parse("'' '''foo'''''").should == "<p><em> <strong>foo</strong></em></p>\n"
75
+ end
76
+
77
+ it 'should accept a literal <em> tag as a means of imposing em followed by strong' do
78
+ @parser.parse("<em>'''foo'''</em>").should == "<p><em><strong>foo</strong></em></p>\n"
79
+ end
80
+
81
+ it 'should accept a literal <strong> tag as a means of imposing em followed by strong' do
82
+ @parser.parse("''<strong>foo</strong>''").should == "<p><em><strong>foo</strong></em></p>\n"
83
+ end
84
+
85
+ it 'should accept literal <em> and <strong> tags as a means of imposing em followed by strong' do
86
+ @parser.parse("<em><strong>foo</strong></em>").should == "<p><em><strong>foo</strong></em></p>\n"
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,99 @@
1
+ #!/usr/bin/env ruby
2
+ # Copyright 2007-2008 Wincent Colaiuta
3
+ # This program is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU General Public License
14
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
15
+
16
+ require File.join(File.dirname(__FILE__), 'spec_helper.rb')
17
+ require 'wikitext'
18
+
19
+ describe Wikitext::Parser, 'parsing <strong> spans' do
20
+ before do
21
+ @parser = Wikitext::Parser.new
22
+ end
23
+
24
+ describe 'marked up using wikitext shorthand' do
25
+ it 'should recognize paired <strong> tokens' do
26
+ @parser.parse("foo '''bar''' baz").should == "<p>foo <strong>bar</strong> baz</p>\n"
27
+ end
28
+
29
+ it 'should automatically insert missing closing tags' do
30
+ @parser.parse("foo '''bar").should == "<p>foo <strong>bar</strong></p>\n"
31
+ end
32
+
33
+ it 'should automatically close unclosed spans upon hitting newlines' do
34
+ @parser.parse("foo '''bar\nbaz").should == "<p>foo <strong>bar</strong> baz</p>\n"
35
+ end
36
+
37
+ it 'should handle (illegal) interleaved spans' do
38
+ @parser.parse("foo '''bar ''inner''' baz''").should == "<p>foo <strong>bar <em>inner</em></strong> baz<em></em></p>\n"
39
+ end
40
+
41
+ it 'should have no effect inside <pre> blocks' do
42
+ @parser.parse(" '''foo'''").should == "<pre>'''foo'''</pre>\n"
43
+ end
44
+
45
+ it 'should have no effect inside <nowiki> spans' do
46
+ @parser.parse("<nowiki>'''foo'''</nowiki>").should == "<p>'''foo'''</p>\n"
47
+ end
48
+
49
+ it "should have no effect if a strong (<strong>) span is already open" do
50
+ @parser.parse("foo <strong>'''bar'''</strong> baz").should == "<p>foo <strong>'''bar'''</strong> baz</p>\n"
51
+ end
52
+ end
53
+
54
+ describe 'marked up using HTML tags' do
55
+ it 'should recognized paired <strong> tokens' do
56
+ @parser.parse("foo <strong>bar</strong> baz").should == "<p>foo <strong>bar</strong> baz</p>\n"
57
+ end
58
+
59
+ it 'should recognize <strong> tokens case-insensitively' do
60
+ @parser.parse("foo <STRong>bar</STRONG> baz").should == "<p>foo <strong>bar</strong> baz</p>\n"
61
+ @parser.parse("foo <strONG>bar</STRong> baz").should == "<p>foo <strong>bar</strong> baz</p>\n"
62
+ @parser.parse("foo <STRONG>bar</strONG> baz").should == "<p>foo <strong>bar</strong> baz</p>\n"
63
+ end
64
+
65
+ it 'should automatically insert missing closing tags' do
66
+ @parser.parse("foo <strong>bar").should == "<p>foo <strong>bar</strong></p>\n"
67
+ end
68
+
69
+ it 'should automatically close unclosed spans upon hitting newlines' do
70
+ @parser.parse("foo <strong>bar\nbaz").should == "<p>foo <strong>bar</strong> baz</p>\n"
71
+ end
72
+
73
+ it 'should handle (illegal) interleaved spans' do
74
+ expected = "<p>foo <strong>bar <em>inner</em></strong> baz&lt;/em&gt;</p>\n"
75
+ @parser.parse("foo <strong>bar <em>inner</strong> baz</em>").should == expected
76
+
77
+ expected = "<p>foo <strong>bar <em>inner</em></strong> baz<em></em></p>\n"
78
+ @parser.parse("foo <strong>bar ''inner</strong> baz''").should == expected
79
+ end
80
+
81
+ it 'should handle (illegal) nested <strong> spans' do
82
+ expected = "<p>foo <strong>bar &lt;strong&gt;inner</strong>&lt;/strong&gt; baz</p>\n"
83
+ @parser.parse('foo <strong>bar <strong>inner</strong></strong> baz').should == expected
84
+ end
85
+
86
+ it 'should have no effect inside <pre> blocks' do
87
+ @parser.parse(" <strong>foo</strong>").should == "<pre>&lt;strong&gt;foo&lt;/strong&gt;</pre>\n"
88
+ end
89
+
90
+ it 'should have no effect inside <nowiki> spans' do
91
+ @parser.parse("<nowiki><strong>foo</strong></nowiki>").should == "<p>&lt;strong&gt;foo&lt;/strong&gt;</p>\n"
92
+ end
93
+
94
+ it "should have no effect if an strong (''') span is already open" do
95
+ expected = "<p>foo <strong>&lt;strong&gt;bar&lt;/strong&gt;</strong> baz</p>\n"
96
+ @parser.parse("foo '''<strong>bar</strong>''' baz").should == expected
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,190 @@
1
+ #!/usr/bin/env ruby
2
+ # Copyright 2008 Wincent Colaiuta
3
+ # This program is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU General Public License
14
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
15
+
16
+ require File.join(File.dirname(__FILE__), 'spec_helper.rb')
17
+ require 'wikitext'
18
+
19
+ describe Wikitext::Parser::Token do
20
+ before do
21
+ @tokens = Wikitext::Parser::Token.types
22
+ end
23
+
24
+ it 'should report the available token types as a hash' do
25
+ @tokens.should be_kind_of(Hash)
26
+ end
27
+
28
+ it 'should report token names as symbols and values as numbers' do
29
+ @tokens.each do |k, v|
30
+ v.should be_kind_of(Symbol)
31
+ k.should be_kind_of(Integer)
32
+ end
33
+ end
34
+
35
+ it 'should report unique token names and values' do
36
+ keys = @tokens.keys
37
+ keys.uniq.length.should == keys.length
38
+ values = @tokens.values
39
+ values.uniq.length.should == values.length
40
+ end
41
+ end
42
+
43
+ describe Wikitext::Parser, 'tokenizing' do
44
+ before do
45
+ @parser = Wikitext::Parser.new
46
+ end
47
+
48
+ it 'should do nothing if passed nil' do
49
+ @parser.tokenize(nil).should == nil
50
+ end
51
+
52
+ it "should complain if passed an object that doesn't quack like a string" do
53
+ lambda { @parser.tokenize({}) }.should raise_error
54
+ end
55
+
56
+ it 'should tokenize strings containing a single symbol' do
57
+ @tokens = @parser.tokenize('foo')
58
+ @tokens.length.should == 2
59
+ @tokens[0].token_type.should == :printable
60
+ @tokens[0].string_value.should == 'foo'
61
+ @tokens[1].token_type.should == :end_of_file
62
+ @tokens[1].string_value.should == ''
63
+ end
64
+
65
+ it 'should tokenize strings containing multiple symbols' do
66
+ @tokens = @parser.tokenize('foo http://example.com/')
67
+ @tokens.length.should == 4
68
+ @tokens[0].token_type.should == :printable
69
+ @tokens[0].string_value.should == 'foo'
70
+ @tokens[1].token_type.should == :space
71
+ @tokens[1].string_value.should == ' '
72
+ @tokens[2].token_type.should == :uri
73
+ @tokens[2].string_value.should == 'http://example.com/'
74
+ @tokens[3].token_type.should == :end_of_file
75
+ @tokens[3].string_value.should == ''
76
+ end
77
+
78
+ it 'should tokenize runs of printable characters as as single symbol' do
79
+ @tokens = @parser.tokenize('foo')
80
+ @tokens.length.should == 2
81
+ @tokens[0].token_type.should == :printable
82
+ @tokens[0].string_value.should == 'foo'
83
+ @tokens[0].line_start.should == 1
84
+ @tokens[0].column_start.should == 1
85
+ @tokens[0].line_stop.should == 1
86
+ @tokens[0].column_stop.should == 4
87
+ @tokens[1].token_type.should == :end_of_file
88
+ @tokens[1].string_value.should == ''
89
+ end
90
+
91
+ it 'should tokenize END_OF_FILE tokens as zero-width tokens' do
92
+ @tokens = @parser.tokenize('')
93
+ @tokens.length.should == 1
94
+ @tokens[0].token_type.should == :end_of_file
95
+ @tokens[0].line_start.should == 1
96
+ @tokens[0].column_start.should == 1
97
+ @tokens[0].line_stop.should == 1
98
+ @tokens[0].column_stop.should == 1
99
+ @tokens[0].string_value.should == ''
100
+ end
101
+
102
+ it 'should be able to tokenize strings containing "}"' do
103
+ # was a bug: we were throwing an exception "failed before finding a token" because our PRINTABLE rule omitted this code point
104
+ lambda { @tokens = @parser.tokenize('}') }.should_not raise_error
105
+ @tokens.length.should == 2
106
+ @tokens[0].token_type.should == :printable
107
+ @tokens[0].string_value.should == '}'
108
+ @tokens[0].line_start.should == 1
109
+ @tokens[0].column_start.should == 1
110
+ @tokens[0].line_stop.should == 1
111
+ @tokens[0].column_stop.should == 2
112
+ @tokens[1].token_type.should == :end_of_file
113
+ @tokens[1].string_value.should == ''
114
+ end
115
+
116
+ it 'should be able to tokenize the full range of printable ASCII' do
117
+ # see the previous example: we just want to make sure that our PRINTABLE rule is adequate
118
+ printable_ascii = (0x20..0x7e).to_a.pack('C*')
119
+ lambda { @parser.tokenize(printable_ascii) }.should_not raise_error
120
+ end
121
+
122
+ it 'should be able to tokenize large blocks of text' do
123
+ large_block_of_text = dedent <<-END
124
+ paragraph
125
+ second line
126
+
127
+ new paragraph
128
+
129
+ = a heading =
130
+
131
+ > a blockquote
132
+ > second line of blockquote
133
+ >
134
+ > new paragraph within blockquote
135
+
136
+ == another heading ==
137
+
138
+ paragraph within ''multiple '''styles''''' and <tt>tt span</tt>
139
+
140
+ similar, but with '''styles in ''different'' order'''
141
+
142
+ again, a '''different ''order'''''
143
+
144
+ * list item 1
145
+ ** nested list item 1
146
+ ** nested list item 2
147
+ ** nested list item 3
148
+ * list item 2
149
+
150
+ // this is a code block
151
+ notice how it can contain ''markup''
152
+ which would '''otherwise''' have <tt>special</tt> meaning
153
+ although explicit entities &copy; are passed through unchanged
154
+
155
+ a normal paragraph again
156
+
157
+ This is where we show a link to an article on [[GCC]].
158
+ Related to that, [[GCC|a link]] to the same
159
+ article but with custom link text.
160
+
161
+ External links [http://example.com work too].
162
+ As well as autolinks as seen http://example.com/
163
+ here.
164
+
165
+ Look at how we handle bad syntax. [[This is an unterminated
166
+ link. And [http://example.com/ is another.
167
+
168
+ # this is an ordered list
169
+ # which continues
170
+ ## and has another ordered list
171
+ ## nested inside it
172
+ # and then falls back
173
+ #* and then nests another list
174
+ #* this time an unordered one
175
+ #** itself containing a nested list
176
+ #** which continues
177
+ #**# and finally nests yet another ordered list
178
+ #**# which continues
179
+ #* drops back quite a way
180
+ # and finally all the way
181
+ #****** and finishes with an invalid item
182
+
183
+ === heading with missing closing tag
184
+ * list
185
+ # new list
186
+ END
187
+ @tokens = @parser.tokenize(large_block_of_text)
188
+ @tokens.length.should > 0
189
+ end
190
+ end