wikitext 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,45 @@
1
+ #!/usr/bin/env ruby
2
+ # Copyright 2008 Wincent Colaiuta
3
+ # This program is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU General Public License
14
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
15
+
16
+ require File.join(File.dirname(__FILE__), 'spec_helper.rb')
17
+ require 'wikitext'
18
+
19
+ # this is a general-purpose file in which I'll add specs for former bugs to make sure that they don't regress
20
+ describe Wikitext::Parser, 'regressions' do
21
+ before do
22
+ @parser = Wikitext::Parser.new
23
+ end
24
+
25
+ it 'should correctly transform example #1' do
26
+ # turns out that this was never a bug in wikitext: it was a bug in the host application
27
+ input = dedent <<-END
28
+ = Leopard =
29
+
30
+ * punto 1
31
+ * punto 2
32
+
33
+ Y [[otro articulo]].
34
+ END
35
+ expected = dedent <<-END
36
+ <h1>Leopard</h1>
37
+ <ul>
38
+ <li>punto 1</li>
39
+ <li>punto 2</li>
40
+ </ul>
41
+ <p>Y <a href="/wiki/otro%20articulo">otro articulo</a>.</p>
42
+ END
43
+ @parser.parse(input).should == expected
44
+ end
45
+ end
@@ -0,0 +1,77 @@
1
+ # Copyright 2007-2008 Wincent Colaiuta
2
+ # This program is free software: you can redistribute it and/or modify
3
+ # it under the terms of the GNU General Public License as published by
4
+ # the Free Software Foundation, either version 3 of the License, or
5
+ # (at your option) any later version.
6
+ #
7
+ # This program is distributed in the hope that it will be useful,
8
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
9
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
+ # GNU General Public License for more details.
11
+ #
12
+ # You should have received a copy of the GNU General Public License
13
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
14
+
15
+ require 'pathname'
16
+ require 'rubygems'
17
+ require 'spec'
18
+
19
+ # allow indenting of multiline spec data for better readability
20
+ # but must dedent it before actually doing the comparison
21
+ def dedent spaces, string = nil
22
+ if spaces.kind_of? String
23
+ if not string.nil?
24
+ raise 'When first argument is a String, second argument must be nil'
25
+ else
26
+ # default use: single String parameter, dedent by 6
27
+ string = spaces
28
+ spaces = 6
29
+ end
30
+ elsif spaces.kind_of? Integer
31
+ if string.nil? or not string.kind_of?(String)
32
+ raise 'When first argument is a number, second must be a String'
33
+ end
34
+ else
35
+ raise 'Invalid argument'
36
+ end
37
+ string.each do |line|
38
+ if not line =~ /\A {#{spaces.to_i}}/
39
+ raise "Underlength indent for line: #{line.inspect}"
40
+ end
41
+ end
42
+ string.gsub /^ {#{spaces.to_i}}/, ''
43
+ end
44
+
45
+ module Wikitext
46
+ if not const_defined? 'EXTDIR'
47
+ # append the local "ext" directory to search path if not already present
48
+ base = File.join(File.dirname(__FILE__), '..')
49
+ EXTDIR = Pathname.new(File.join(base, 'ext')).realpath
50
+ normalized = $:.collect { |path| Pathname.new(path).realpath rescue path }
51
+ $:.push(EXTDIR) unless normalized.include?(EXTDIR)
52
+ end
53
+ end # module Wikitext
54
+
55
+ module UTF8
56
+ if not const_defined? 'Invalid'
57
+ module Invalid
58
+ TWO_BYTES_MISSING_SECOND_BYTE = [0b11011111].pack('C*')
59
+ TWO_BYTES_MALFORMED_SECOND_BYTE = [0b11011111, 0b00001111].pack('C*') # should be 10......
60
+ OVERLONG = [0b11000000, 0b10000000].pack('C*') # lead byte is 110..... but code point is <= 127
61
+ OVERLONG_ALT = [0b11000001, 0b10000000].pack('C*') # lead byte is 110..... but code point is <= 127
62
+ THREE_BYTES_MISSING_SECOND_BYTE = [0b11100000].pack('C*')
63
+ THREE_BYTES_MISSING_THIRD_BYTE = [0b11100000, 0b10000000].pack('C*')
64
+ THREE_BYTES_MALFORMED_SECOND_BYTE = [0b11100000, 0b00001111, 0b10000000].pack('C*') # should be 10......
65
+ THREE_BYTES_MALFORMED_THIRD_BYTE = [0b11100000, 0b10000000, 0b00001111].pack('C*') # should be 10......
66
+ FOUR_BYTES_MISSING_SECOND_BYTE = [0b11110000].pack('C*')
67
+ FOUR_BYTES_MISSING_THIRD_BYTE = [0b11110000, 0x10111111].pack('C*')
68
+ FOUR_BYTES_MISSING_FOURTH_BYTE = [0b11110000, 0x10111111, 0x10111111].pack('C*')
69
+ FOUR_BYTES_ILLEGAL_FIRST_BYTE = [0b11110101, 0x10111111, 0x10111111, 0x10111111].pack('C*')
70
+ FOUR_BYTES_ILLEGAL_FIRST_BYTE_ALT = [0b11110101, 0x10111111, 0x10111111, 0x10111111].pack('C*')
71
+ FOUR_BYTES_ILLEGAL_FIRST_BYTE_ALT2 = [0b11110101, 0x10111111, 0x10111111, 0x10111111].pack('C*')
72
+ UNEXPECTED_BYTE = [0b11111000].pack('C*')
73
+ end # module Invalid
74
+ end
75
+ end # module UTF8
76
+
77
+ require 'wikitext'
@@ -0,0 +1,89 @@
1
+ #!/usr/bin/env ruby
2
+ # Copyright 2007-2008 Wincent Colaiuta
3
+ # This program is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU General Public License
14
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
15
+
16
+ require File.join(File.dirname(__FILE__), 'spec_helper.rb')
17
+ require 'wikitext'
18
+
19
+ describe Wikitext::Parser, 'parsing combined <strong>/<em> spans' do
20
+ before do
21
+ @parser = Wikitext::Parser.new
22
+ end
23
+
24
+ it 'should recognize paired "<strong><em>" tokens' do
25
+ @parser.parse("foo '''''bar''''' baz").should == "<p>foo <strong><em>bar</em></strong> baz</p>\n"
26
+ end
27
+
28
+ it 'should automatically insert missing closing tags' do
29
+ @parser.parse("foo '''''bar").should == "<p>foo <strong><em>bar</em></strong></p>\n" # totally missing
30
+ @parser.parse("foo '''''bar''").should == "<p>foo <strong><em>bar</em></strong></p>\n" # only <strong> missing
31
+ @parser.parse("foo '''''bar'''").should == "<p>foo <strong><em>bar</em></strong></p>\n" # only <em> missing
32
+ end
33
+
34
+ it 'should automatically close unclosed spans upon hitting newlines' do
35
+ @parser.parse("foo '''''bar\nbaz").should == "<p>foo <strong><em>bar</em></strong> baz</p>\n" # totally missing
36
+ @parser.parse("foo '''''bar''\nbaz").should == "<p>foo <strong><em>bar</em></strong> baz</p>\n" # only <strong> missing
37
+ @parser.parse("foo '''''bar'''\nbaz").should == "<p>foo <strong><em>bar</em></strong> baz</p>\n" # only <em> missing
38
+ end
39
+
40
+ it 'should allow combined "<strong><em>" tokens to interact with separate <strong> and <em> tokens' do
41
+ @parser.parse("foo '''bar ''baz'''''").should == "<p>foo <strong>bar <em>baz</em></strong></p>\n"
42
+ @parser.parse("foo ''bar '''baz'''''").should == "<p>foo <em>bar <strong>baz</strong></em></p>\n"
43
+ @parser.parse("'''''foo'' bar''' baz").should == "<p><strong><em>foo</em> bar</strong> baz</p>\n"
44
+ end
45
+
46
+ it 'should handle (illegal) interleaved spans' do
47
+ # ''''' means "<strong><em>" so when we see ''' we try to close the <strong> first, which makes for illegal nesting
48
+ @parser.parse("'''''foo''' bar'' baz").should == "<p><strong><em>foo</em></strong> bar<em> baz</em></p>\n"
49
+
50
+ # note that if you really want ''''' to be parsed as "<em><strong>" you have to use whitespace to disambiguate
51
+ # for more examples see the "disambiguation" specs below
52
+ @parser.parse("'' '''foo''' bar'' baz").should == "<p><em> <strong>foo</strong> bar</em> baz</p>\n"
53
+ end
54
+
55
+ it 'should have no effect inside <pre> blocks' do
56
+ @parser.parse(" '''''foo'''''").should == "<pre>'''''foo'''''</pre>\n"
57
+ end
58
+
59
+ it 'should have no effect inside <nowiki> spans' do
60
+ @parser.parse("<nowiki>'''''foo'''''</nowiki>").should == "<p>'''''foo'''''</p>\n"
61
+ end
62
+
63
+ describe 'disambiguation' do
64
+ it 'should by default assume strong followed by em' do
65
+ @parser.parse("'''''foo'''''").should == "<p><strong><em>foo</em></strong></p>\n"
66
+ end
67
+
68
+ it 'should accept an empty nowiki span as a means of imposing em followed by strong' do
69
+ @parser.parse("''<nowiki></nowiki>'''foo'''''").should == "<p><em><strong>foo</strong></em></p>\n"
70
+ end
71
+
72
+ it 'should accept whitespace as a means of imposing em followed by strong' do
73
+ # when rendered in the browser the whitespace won't have any visual effect
74
+ @parser.parse("'' '''foo'''''").should == "<p><em> <strong>foo</strong></em></p>\n"
75
+ end
76
+
77
+ it 'should accept a literal <em> tag as a means of imposing em followed by strong' do
78
+ @parser.parse("<em>'''foo'''</em>").should == "<p><em><strong>foo</strong></em></p>\n"
79
+ end
80
+
81
+ it 'should accept a literal <strong> tag as a means of imposing em followed by strong' do
82
+ @parser.parse("''<strong>foo</strong>''").should == "<p><em><strong>foo</strong></em></p>\n"
83
+ end
84
+
85
+ it 'should accept literal <em> and <strong> tags as a means of imposing em followed by strong' do
86
+ @parser.parse("<em><strong>foo</strong></em>").should == "<p><em><strong>foo</strong></em></p>\n"
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,99 @@
1
+ #!/usr/bin/env ruby
2
+ # Copyright 2007-2008 Wincent Colaiuta
3
+ # This program is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU General Public License
14
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
15
+
16
+ require File.join(File.dirname(__FILE__), 'spec_helper.rb')
17
+ require 'wikitext'
18
+
19
+ describe Wikitext::Parser, 'parsing <strong> spans' do
20
+ before do
21
+ @parser = Wikitext::Parser.new
22
+ end
23
+
24
+ describe 'marked up using wikitext shorthand' do
25
+ it 'should recognize paired <strong> tokens' do
26
+ @parser.parse("foo '''bar''' baz").should == "<p>foo <strong>bar</strong> baz</p>\n"
27
+ end
28
+
29
+ it 'should automatically insert missing closing tags' do
30
+ @parser.parse("foo '''bar").should == "<p>foo <strong>bar</strong></p>\n"
31
+ end
32
+
33
+ it 'should automatically close unclosed spans upon hitting newlines' do
34
+ @parser.parse("foo '''bar\nbaz").should == "<p>foo <strong>bar</strong> baz</p>\n"
35
+ end
36
+
37
+ it 'should handle (illegal) interleaved spans' do
38
+ @parser.parse("foo '''bar ''inner''' baz''").should == "<p>foo <strong>bar <em>inner</em></strong> baz<em></em></p>\n"
39
+ end
40
+
41
+ it 'should have no effect inside <pre> blocks' do
42
+ @parser.parse(" '''foo'''").should == "<pre>'''foo'''</pre>\n"
43
+ end
44
+
45
+ it 'should have no effect inside <nowiki> spans' do
46
+ @parser.parse("<nowiki>'''foo'''</nowiki>").should == "<p>'''foo'''</p>\n"
47
+ end
48
+
49
+ it "should have no effect if a strong (<strong>) span is already open" do
50
+ @parser.parse("foo <strong>'''bar'''</strong> baz").should == "<p>foo <strong>'''bar'''</strong> baz</p>\n"
51
+ end
52
+ end
53
+
54
+ describe 'marked up using HTML tags' do
55
+ it 'should recognized paired <strong> tokens' do
56
+ @parser.parse("foo <strong>bar</strong> baz").should == "<p>foo <strong>bar</strong> baz</p>\n"
57
+ end
58
+
59
+ it 'should recognize <strong> tokens case-insensitively' do
60
+ @parser.parse("foo <STRong>bar</STRONG> baz").should == "<p>foo <strong>bar</strong> baz</p>\n"
61
+ @parser.parse("foo <strONG>bar</STRong> baz").should == "<p>foo <strong>bar</strong> baz</p>\n"
62
+ @parser.parse("foo <STRONG>bar</strONG> baz").should == "<p>foo <strong>bar</strong> baz</p>\n"
63
+ end
64
+
65
+ it 'should automatically insert missing closing tags' do
66
+ @parser.parse("foo <strong>bar").should == "<p>foo <strong>bar</strong></p>\n"
67
+ end
68
+
69
+ it 'should automatically close unclosed spans upon hitting newlines' do
70
+ @parser.parse("foo <strong>bar\nbaz").should == "<p>foo <strong>bar</strong> baz</p>\n"
71
+ end
72
+
73
+ it 'should handle (illegal) interleaved spans' do
74
+ expected = "<p>foo <strong>bar <em>inner</em></strong> baz&lt;/em&gt;</p>\n"
75
+ @parser.parse("foo <strong>bar <em>inner</strong> baz</em>").should == expected
76
+
77
+ expected = "<p>foo <strong>bar <em>inner</em></strong> baz<em></em></p>\n"
78
+ @parser.parse("foo <strong>bar ''inner</strong> baz''").should == expected
79
+ end
80
+
81
+ it 'should handle (illegal) nested <strong> spans' do
82
+ expected = "<p>foo <strong>bar &lt;strong&gt;inner</strong>&lt;/strong&gt; baz</p>\n"
83
+ @parser.parse('foo <strong>bar <strong>inner</strong></strong> baz').should == expected
84
+ end
85
+
86
+ it 'should have no effect inside <pre> blocks' do
87
+ @parser.parse(" <strong>foo</strong>").should == "<pre>&lt;strong&gt;foo&lt;/strong&gt;</pre>\n"
88
+ end
89
+
90
+ it 'should have no effect inside <nowiki> spans' do
91
+ @parser.parse("<nowiki><strong>foo</strong></nowiki>").should == "<p>&lt;strong&gt;foo&lt;/strong&gt;</p>\n"
92
+ end
93
+
94
+ it "should have no effect if an strong (''') span is already open" do
95
+ expected = "<p>foo <strong>&lt;strong&gt;bar&lt;/strong&gt;</strong> baz</p>\n"
96
+ @parser.parse("foo '''<strong>bar</strong>''' baz").should == expected
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,190 @@
1
+ #!/usr/bin/env ruby
2
+ # Copyright 2008 Wincent Colaiuta
3
+ # This program is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU General Public License as published by
5
+ # the Free Software Foundation, either version 3 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU General Public License
14
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
15
+
16
+ require File.join(File.dirname(__FILE__), 'spec_helper.rb')
17
+ require 'wikitext'
18
+
19
+ describe Wikitext::Parser::Token do
20
+ before do
21
+ @tokens = Wikitext::Parser::Token.types
22
+ end
23
+
24
+ it 'should report the available token types as a hash' do
25
+ @tokens.should be_kind_of(Hash)
26
+ end
27
+
28
+ it 'should report token names as symbols and values as numbers' do
29
+ @tokens.each do |k, v|
30
+ v.should be_kind_of(Symbol)
31
+ k.should be_kind_of(Integer)
32
+ end
33
+ end
34
+
35
+ it 'should report unique token names and values' do
36
+ keys = @tokens.keys
37
+ keys.uniq.length.should == keys.length
38
+ values = @tokens.values
39
+ values.uniq.length.should == values.length
40
+ end
41
+ end
42
+
43
+ describe Wikitext::Parser, 'tokenizing' do
44
+ before do
45
+ @parser = Wikitext::Parser.new
46
+ end
47
+
48
+ it 'should do nothing if passed nil' do
49
+ @parser.tokenize(nil).should == nil
50
+ end
51
+
52
+ it "should complain if passed an object that doesn't quack like a string" do
53
+ lambda { @parser.tokenize({}) }.should raise_error
54
+ end
55
+
56
+ it 'should tokenize strings containing a single symbol' do
57
+ @tokens = @parser.tokenize('foo')
58
+ @tokens.length.should == 2
59
+ @tokens[0].token_type.should == :printable
60
+ @tokens[0].string_value.should == 'foo'
61
+ @tokens[1].token_type.should == :end_of_file
62
+ @tokens[1].string_value.should == ''
63
+ end
64
+
65
+ it 'should tokenize strings containing multiple symbols' do
66
+ @tokens = @parser.tokenize('foo http://example.com/')
67
+ @tokens.length.should == 4
68
+ @tokens[0].token_type.should == :printable
69
+ @tokens[0].string_value.should == 'foo'
70
+ @tokens[1].token_type.should == :space
71
+ @tokens[1].string_value.should == ' '
72
+ @tokens[2].token_type.should == :uri
73
+ @tokens[2].string_value.should == 'http://example.com/'
74
+ @tokens[3].token_type.should == :end_of_file
75
+ @tokens[3].string_value.should == ''
76
+ end
77
+
78
+ it 'should tokenize runs of printable characters as as single symbol' do
79
+ @tokens = @parser.tokenize('foo')
80
+ @tokens.length.should == 2
81
+ @tokens[0].token_type.should == :printable
82
+ @tokens[0].string_value.should == 'foo'
83
+ @tokens[0].line_start.should == 1
84
+ @tokens[0].column_start.should == 1
85
+ @tokens[0].line_stop.should == 1
86
+ @tokens[0].column_stop.should == 4
87
+ @tokens[1].token_type.should == :end_of_file
88
+ @tokens[1].string_value.should == ''
89
+ end
90
+
91
+ it 'should tokenize END_OF_FILE tokens as zero-width tokens' do
92
+ @tokens = @parser.tokenize('')
93
+ @tokens.length.should == 1
94
+ @tokens[0].token_type.should == :end_of_file
95
+ @tokens[0].line_start.should == 1
96
+ @tokens[0].column_start.should == 1
97
+ @tokens[0].line_stop.should == 1
98
+ @tokens[0].column_stop.should == 1
99
+ @tokens[0].string_value.should == ''
100
+ end
101
+
102
+ it 'should be able to tokenize strings containing "}"' do
103
+ # was a bug: we were throwing an exception "failed before finding a token" because our PRINTABLE rule omitted this code point
104
+ lambda { @tokens = @parser.tokenize('}') }.should_not raise_error
105
+ @tokens.length.should == 2
106
+ @tokens[0].token_type.should == :printable
107
+ @tokens[0].string_value.should == '}'
108
+ @tokens[0].line_start.should == 1
109
+ @tokens[0].column_start.should == 1
110
+ @tokens[0].line_stop.should == 1
111
+ @tokens[0].column_stop.should == 2
112
+ @tokens[1].token_type.should == :end_of_file
113
+ @tokens[1].string_value.should == ''
114
+ end
115
+
116
+ it 'should be able to tokenize the full range of printable ASCII' do
117
+ # see the previous example: we just want to make sure that our PRINTABLE rule is adequate
118
+ printable_ascii = (0x20..0x7e).to_a.pack('C*')
119
+ lambda { @parser.tokenize(printable_ascii) }.should_not raise_error
120
+ end
121
+
122
+ it 'should be able to tokenize large blocks of text' do
123
+ large_block_of_text = dedent <<-END
124
+ paragraph
125
+ second line
126
+
127
+ new paragraph
128
+
129
+ = a heading =
130
+
131
+ > a blockquote
132
+ > second line of blockquote
133
+ >
134
+ > new paragraph within blockquote
135
+
136
+ == another heading ==
137
+
138
+ paragraph within ''multiple '''styles''''' and <tt>tt span</tt>
139
+
140
+ similar, but with '''styles in ''different'' order'''
141
+
142
+ again, a '''different ''order'''''
143
+
144
+ * list item 1
145
+ ** nested list item 1
146
+ ** nested list item 2
147
+ ** nested list item 3
148
+ * list item 2
149
+
150
+ // this is a code block
151
+ notice how it can contain ''markup''
152
+ which would '''otherwise''' have <tt>special</tt> meaning
153
+ although explicit entities &copy; are passed through unchanged
154
+
155
+ a normal paragraph again
156
+
157
+ This is where we show a link to an article on [[GCC]].
158
+ Related to that, [[GCC|a link]] to the same
159
+ article but with custom link text.
160
+
161
+ External links [http://example.com work too].
162
+ As well as autolinks as seen http://example.com/
163
+ here.
164
+
165
+ Look at how we handle bad syntax. [[This is an unterminated
166
+ link. And [http://example.com/ is another.
167
+
168
+ # this is an ordered list
169
+ # which continues
170
+ ## and has another ordered list
171
+ ## nested inside it
172
+ # and then falls back
173
+ #* and then nests another list
174
+ #* this time an unordered one
175
+ #** itself containing a nested list
176
+ #** which continues
177
+ #**# and finally nests yet another ordered list
178
+ #**# which continues
179
+ #* drops back quite a way
180
+ # and finally all the way
181
+ #****** and finishes with an invalid item
182
+
183
+ === heading with missing closing tag
184
+ * list
185
+ # new list
186
+ END
187
+ @tokens = @parser.tokenize(large_block_of_text)
188
+ @tokens.length.should > 0
189
+ end
190
+ end