mediacloth 0.0.3 → 0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (115) hide show
  1. data/README.md +36 -0
  2. data/lib/mediacloth/mediawikiast.rb +58 -1
  3. data/lib/mediacloth/mediawikihtmlgenerator.rb +229 -73
  4. data/lib/mediacloth/mediawikilexer.rb +1030 -656
  5. data/lib/mediacloth/mediawikilinkhandler.rb +89 -0
  6. data/lib/mediacloth/mediawikiparams.rb +1 -10
  7. data/lib/mediacloth/mediawikiparser.rb +939 -409
  8. data/lib/mediacloth/mediawikiparser.tab.rb +1357 -0
  9. data/lib/mediacloth/mediawikiparser.y +256 -52
  10. data/lib/mediacloth/mediawikisignedwikigenerator.rb +42 -0
  11. data/lib/mediacloth/mediawikitemplatehandler.rb +8 -0
  12. data/lib/mediacloth/mediawikiwalker.rb +72 -1
  13. data/lib/mediacloth.rb +33 -10
  14. data/test/data/ast1 +68 -0
  15. data/test/data/ast10 +196 -0
  16. data/test/data/ast11 +34 -0
  17. data/test/data/ast12 +39 -0
  18. data/test/data/ast13 +25 -0
  19. data/test/data/ast14 +13 -0
  20. data/test/data/ast15 +25 -0
  21. data/test/data/ast16 +17 -0
  22. data/test/data/ast17 +9 -0
  23. data/test/data/ast18 +21 -0
  24. data/test/data/ast19 +32 -0
  25. data/test/data/ast2 +4 -0
  26. data/test/data/ast20 +10 -0
  27. data/test/data/ast21 +27 -0
  28. data/test/data/ast22 +22 -0
  29. data/test/data/ast23 +5 -0
  30. data/test/data/ast3 +6 -0
  31. data/test/data/ast4 +122 -0
  32. data/test/data/ast5 +122 -0
  33. data/test/data/ast6 +22 -0
  34. data/test/data/ast7 +143 -0
  35. data/test/data/ast8 +3 -0
  36. data/test/data/ast9 +11 -0
  37. data/test/data/html1 +33 -5
  38. data/test/data/html10 +31 -27
  39. data/test/data/html11 +19 -0
  40. data/test/data/html12 +32 -0
  41. data/test/data/html13 +29 -0
  42. data/test/data/html14 +4 -0
  43. data/test/data/html15 +29 -0
  44. data/test/data/html16 +28 -0
  45. data/test/data/html17 +10 -0
  46. data/test/data/html18 +8 -0
  47. data/test/data/html19 +27 -0
  48. data/test/data/html2 +1 -1
  49. data/test/data/html20 +7 -0
  50. data/test/data/html21 +5 -0
  51. data/test/data/html22 +24 -0
  52. data/test/data/html23 +7 -0
  53. data/test/data/html3 +1 -1
  54. data/test/data/html4 +60 -11
  55. data/test/data/html5 +45 -6
  56. data/test/data/html6 +5 -5
  57. data/test/data/html7 +59 -1
  58. data/test/data/html8 +1 -1
  59. data/test/data/html9 +10 -2
  60. data/test/data/input1 +4 -0
  61. data/test/data/input11 +19 -0
  62. data/test/data/input12 +32 -0
  63. data/test/data/input13 +10 -0
  64. data/test/data/input14 +8 -0
  65. data/test/data/input15 +10 -0
  66. data/test/data/input16 +28 -0
  67. data/test/data/input17 +10 -0
  68. data/test/data/input18 +16 -0
  69. data/test/data/input19 +29 -0
  70. data/test/data/input20 +8 -0
  71. data/test/data/input21 +18 -0
  72. data/test/data/input22 +20 -0
  73. data/test/data/input23 +8 -0
  74. data/test/data/input4 +13 -1
  75. data/test/data/input5 +45 -4
  76. data/test/data/input7 +25 -1
  77. data/test/data/lex1 +17 -18
  78. data/test/data/lex10 +57 -87
  79. data/test/data/lex11 +18 -0
  80. data/test/data/lex12 +32 -0
  81. data/test/data/lex13 +3 -0
  82. data/test/data/lex14 +1 -0
  83. data/test/data/lex15 +3 -0
  84. data/test/data/lex16 +27 -0
  85. data/test/data/lex17 +9 -0
  86. data/test/data/lex18 +4 -0
  87. data/test/data/lex19 +27 -0
  88. data/test/data/lex2 +2 -2
  89. data/test/data/lex20 +7 -0
  90. data/test/data/lex21 +4 -0
  91. data/test/data/lex22 +3 -0
  92. data/test/data/lex23 +7 -0
  93. data/test/data/lex3 +1 -1
  94. data/test/data/lex4 +35 -29
  95. data/test/data/lex5 +57 -18
  96. data/test/data/lex6 +7 -7
  97. data/test/data/lex7 +42 -18
  98. data/test/data/lex8 +1 -1
  99. data/test/data/lex9 +6 -6
  100. data/test/dataproducers/ast.rb +24 -0
  101. data/test/dataproducers/html.rb +11 -12
  102. data/test/dataproducers/lex.rb +9 -4
  103. data/test/debugwalker.rb +25 -11
  104. data/test/htmlgenerator.rb +170 -13
  105. data/test/lexer.rb +626 -83
  106. data/test/linkhandler.rb +39 -0
  107. data/test/parser.rb +176 -9
  108. data/test/signedwikigenerator.rb +113 -0
  109. metadata +158 -79
  110. data/README +0 -37
  111. data/lib/mediacloth/mediawikilexer.rb~ +0 -491
  112. data/lib/mediacloth/mediawikiparser.y~ +0 -210
  113. data/test/data/result1 +0 -48
  114. data/test/dataproducers/html.rb~ +0 -24
  115. data/test/dataproducers/lex.rb~ +0 -15
data/test/lexer.rb CHANGED
@@ -4,100 +4,643 @@ require 'testhelper'
4
4
 
5
5
  class Lexer_Test < Test::Unit::TestCase
6
6
 
7
+ class << self
7
8
  include TestHelper
9
+ end
8
10
 
9
- def test_standard_formatted_input
10
- test_files("lex") { |input,result,resultname|
11
- lexer = MediaWikiLexer.new
12
- tokens = lexer.tokenize(input)
13
- assert_equal(result, tokens.to_s, "Mismatch in #{resultname}")
14
- }
15
- end
16
-
17
- def test_internet_formatted_input
18
- test_files("lex") { |input,result,resultname|
19
- lexer = MediaWikiLexer.new
20
- tokens = lexer.tokenize(input.gsub("\n", "\r\n"))
21
- assert_equal(result.gsub("\n", "\r\n"), tokens.to_s, "Mismatch in #{resultname}")
22
- }
23
- end
11
+ test_files("lex") do |input,result,resultname|
12
+ resultname =~ /([0-9]+)$/
13
+ define_method("test_win_unix_le_formatted_input_#{$1}") do
14
+ lexer_unix = MediaWikiLexer.new
15
+ tokens_unix = lexer_unix.tokenize(input)
16
+ lexer_win = MediaWikiLexer.new
17
+ tokens_win = lexer_win.tokenize(input.gsub("\n", "\r\n"))
24
18
 
25
- def test_paragraphs
26
- assert_equal(lex("text\n\ntext"),
27
- [[:PARA_START, ""], [:TEXT, "text"], [:PARA_END, "\n\n"],
28
- [:PARA_START, ""], [:TEXT, "text"], [:PARA_END, ""], [false,false]])
29
- assert_equal(lex("text\r\n\r\ntext"),
30
- [[:PARA_START, ""], [:TEXT, "text"], [:PARA_END, "\r\n\r\n"],
31
- [:PARA_START, ""], [:TEXT, "text"], [:PARA_END, ""], [false,false]])
32
- assert_equal(lex("Before\n\n=Headline="),
33
- [[:PARA_START, ""], [:TEXT, "Before"], [:PARA_END, "\n\n"],
34
- [:SECTION_START, "="], [:TEXT, "Headline"], [:SECTION_END, "="], [false,false]])
35
- assert_equal(lex("Before\r\n\r\n=Headline="),
36
- [[:PARA_START, ""], [:TEXT, "Before"], [:PARA_END, "\r\n\r\n"],
37
- [:SECTION_START, "="], [:TEXT, "Headline"], [:SECTION_END, "="], [false,false]])
38
- end
39
-
40
- def test_empty
41
- assert_equal(lex(""), [[false,false]])
42
- end
19
+ tokens_unix_check = []
20
+ tokens_unix.each do
21
+ |token|
22
+ tokens_unix_check << token[0,1]
23
+ end
43
24
 
44
- def test_preformatted
45
- #assure preformatted text works as expected at the start of the text
46
- assert_equal(lex(" Foo\n"), [[:PRE, "Foo\n"], [false, false]])
47
- assert_equal(lex(" Foo\r\n"), [[:PRE, "Foo\r\n"], [false, false]])
48
- assert_equal(lex(" Foo"), [[:PRE, "Foo"], [false, false]])
25
+ tokens_win_check = []
26
+ tokens_win.each do
27
+ |token|
28
+ tokens_win_check << token[0,1]
29
+ end
30
+ assert_equal(tokens_unix_check, tokens_win_check, "Mismatch in #{resultname}")
49
31
  end
32
+ end
50
33
 
51
- def test_hline
52
- #assure that at the start of the text hline still works
53
- assert_equal(lex("----"), [[:HLINE, "----"], [false, false]])
54
- assert_equal(lex("\n----"), [[:HLINE, "----"], [false, false]])
55
- assert_equal(lex("\r\n----"), [[:HLINE, "----"], [false, false]])
34
+ test_files("lex") do |input,result,resultname|
35
+ resultname =~ /([0-9]+)$/
36
+ define_method("test_internet_formatted_input_#{$1}") do
37
+ lexer = MediaWikiLexer.new
38
+ tokens = lexer.tokenize(input)
39
+ assert_equal(result, tokens.to_s, "Mismatch in #{resultname}")
56
40
  end
57
-
58
- def test_inline_links
59
- #assure that links in-line work
60
- assert_equal(lex("http://example.com"), [[:PARA_START, ""], [:LINKSTART, ""], [:TEXT, "http://example.com"], [:LINKEND, "]"], [:PARA_END, ""], [false, false]])
61
- assert_equal(lex("http://example.com\n"), [[:PARA_START, ""], [:LINKSTART, ""], [:TEXT, "http://example.com"], [:LINKEND, "]"], [:PARA_END, ""], [false, false]])
62
- #assert_equal(lex("http://example.com''italic''"), [[:PARA_START, ""], [:LINKSTART, ""], [:TEXT, "http://example.com"], [:LINKEND, "]"], [:PARA_END, ""], [false, false]])
63
- end
41
+ end
64
42
 
65
- def test_ending_text_token
66
- #check for a problem when the last token is TEXT and it's not included
67
- assert_equal(lex("\n----\nfoo\n"),
68
- [[:HLINE, "----"], [:PARA_START, ""],
69
- [:TEXT, "\nfoo\n"], [:PARA_END, ""], [false, false]])
70
- assert_equal(lex("\r\n----\r\nfoo\r\n"),
71
- [[:HLINE, "----"], [:PARA_START, ""],
72
- [:TEXT, "\r\nfoo\r\n"], [:PARA_END, ""], [false, false]])
73
- assert_equal(lex("\n----\nfoo\n Hehe"),
74
- [[:HLINE, "----"], [:PARA_START, ""], [:TEXT, "\nfoo\n"],
75
- [:PARA_END, ""], [:PRE, "Hehe"], [false, false]])
76
- assert_equal(lex("\r\n----\r\nfoo\r\n Hehe"),
77
- [[:HLINE, "----"], [:PARA_START, ""], [:TEXT, "\r\nfoo\r\n"],
78
- [:PARA_END, ""], [:PRE, "Hehe"], [false, false]])
79
- end
43
+ def test_empty
44
+ assert_equal([[false,false,0,0]], lex(""))
45
+ end
46
+
47
+ def test_paragraphs
48
+ assert_equal(
49
+ [[:PARA_START, "", 0, 0], [:TEXT, "text", 0, 4], [:PARA_END, "", 4, 0], [false,false, 4, 0]],
50
+ lex("text"))
51
+ assert_equal(
52
+ [[:PARA_START, "", 0, 0], [:TEXT, "text\ntext", 0, 9], [:PARA_END, "", 9, 0], [false,false, 9, 0]],
53
+ lex("text\ntext"))
54
+ assert_equal(
55
+ [[:PARA_START, "", 0, 0], [:TEXT, "text\r\ntext", 0, 10], [:PARA_END, "", 10, 0], [false,false, 10, 0]],
56
+ lex("text\r\ntext"))
57
+ assert_equal(
58
+ [[:PARA_START, "", 0, 0], [:TEXT, "text\n\n", 0, 6], [:PARA_END, "", 6, 0],
59
+ [:PARA_START, "", 6, 0], [:TEXT, "text", 6, 4], [:PARA_END, "", 10, 0], [false,false, 10, 0]],
60
+ lex("text\n\ntext"))
61
+ assert_equal(
62
+ [[:PARA_START, "", 0, 0], [:TEXT, "text\r\n\r\n", 0, 8], [:PARA_END, "", 8, 0],
63
+ [:PARA_START, "", 8, 0], [:TEXT, "text", 8, 4], [:PARA_END, "", 12, 0], [false,false, 12, 0]],
64
+ lex("text\r\n\r\ntext"))
65
+ assert_equal(
66
+ [[:PARA_START, "", 0, 0], [:TEXT, "text\n\n", 0, 6], [:PARA_END, "", 6, 0],
67
+ [:PARA_START, "", 6, 0], [:TEXT, "\ntext", 6, 5], [:PARA_END, "", 11, 0], [false,false, 11, 0]],
68
+ lex("text\n\n\ntext"))
69
+ assert_equal(
70
+ [[:PARA_START, "", 0, 0], [:TEXT, "text\n\n", 0, 6], [:PARA_END, "", 6, 0], [:PARA_START, "", 6, 0],
71
+ [:TEXT, "\n\n", 6, 2], [:PARA_END, "", 8, 0], [:PARA_START, "", 8, 0], [:TEXT, "text", 8, 4], [:PARA_END, "", 12, 0],
72
+ [false,false, 12, 0]],
73
+ lex("text\n\n\n\ntext"))
74
+ assert_equal(
75
+ [[:PARA_START, "", 0, 0], [:TEXT, "text\n", 0, 5], [:PARA_END, "", 5, 0],
76
+ [:SECTION_START, "=", 5, 1], [:TEXT, "heading", 6, 7], [:SECTION_END, "=", 13, 1], [false,false, 14, 0]],
77
+ lex("text\n=heading="))
78
+ assert_equal(
79
+ [[:PARA_START, "", 0, 0], [:TEXT, "text\r\n", 0, 6], [:PARA_END, "", 6, 0],
80
+ [:SECTION_START, "=", 6, 1], [:TEXT, "heading", 7, 7], [:SECTION_END, "=", 14, 1], [false,false, 15, 0]],
81
+ lex("text\r\n=heading="))
82
+ assert_equal(
83
+ [[:SECTION_START, "=", 0, 1], [:TEXT, "heading", 1, 7], [:SECTION_END, "=", 8, 0],
84
+ [:PARA_START, "", 8, 2], [:TEXT, "text", 10, 4], [:PARA_END, "", 14, 0], [false,false, 14, 0]],
85
+ lex("=heading=\ntext"))
86
+ assert_equal(
87
+ [[:SECTION_START, "=", 0, 1], [:TEXT, "heading", 1, 7], [:SECTION_END, "=", 8, 0],
88
+ [:PARA_START, "", 8, 3], [:TEXT, "text", 11, 4], [:PARA_END, "", 15, 0], [false,false, 15, 0]],
89
+ lex("=heading=\r\ntext"))
90
+ assert_equal(
91
+ [[:PARA_START, "", 0, 0], [:TEXT, "text\n\n", 0, 6], [:PARA_END, "", 6, 0],
92
+ [:SECTION_START, "=", 6, 1], [:TEXT, "heading", 7, 7], [:SECTION_END, "=", 14, 1], [false,false, 15, 0]],
93
+ lex("text\n\n=heading="))
94
+ assert_equal(
95
+ [[:PARA_START, "", 0, 0], [:TEXT, "text\r\n\r\n", 0, 8], [:PARA_END, "", 8, 0],
96
+ [:SECTION_START, "=", 8, 1], [:TEXT, "heading", 9, 7], [:SECTION_END, "=", 16, 1], [false,false, 17, 0]],
97
+ lex("text\r\n\r\n=heading="))
98
+ end
99
+
100
+ def test_formatting
101
+ assert_equal(
102
+ [[:PARA_START, "", 0, 0], [:ITALIC_START, "''", 0, 2], [:TEXT, "italic", 2, 6], [:ITALIC_END, "''", 8, 2],
103
+ [:PARA_END, "", 10, 0], [false,false, 10, 0]],
104
+ lex("''italic''"))
105
+ assert_equal(
106
+ [[:PARA_START, "", 0, 0], [:BOLD_START, "'''", 0, 3], [:TEXT, "bold", 3, 4], [:BOLD_END, "'''", 7, 3],
107
+ [:PARA_END, "", 10, 0], [false,false, 10, 0]],
108
+ lex("'''bold'''"))
109
+ assert_equal(
110
+ [[:PARA_START, "", 0, 0], [:ITALIC_START, "''", 0, 2], [:TEXT, "italic", 2, 6], [:BOLD_START, "'''", 8, 3],
111
+ [:TEXT, "bold", 11, 4], [:BOLD_END, "'''", 15, 3], [:TEXT, "italic", 18, 6], [:ITALIC_END, "''", 24, 2],
112
+ [:PARA_END, "", 26, 0], [false,false, 26, 0]],
113
+ lex("''italic'''bold'''italic''"))
114
+ assert_equal(
115
+ [[:PARA_START, "", 0, 0], [:ITALIC_START, "''", 0, 2], [:BOLD_START, "'''", 2, 3],
116
+ [:TEXT, "bolditalic", 5, 10], [:BOLD_END, "'''", 15, 3], [:ITALIC_END, "''", 18, 2],
117
+ [:PARA_END, "", 20, 0], [false,false, 20, 0]],
118
+ lex("'''''bolditalic'''''"))
119
+ assert_equal(
120
+ [[:PARA_START, "", 0, 0], [:ITALIC_START, "''", 0, 2], [:TEXT, "italic\n\n", 2, 8], [:ITALIC_END, "", 10, 0],
121
+ [:PARA_END, "", 10, 0], [false,false, 10, 0]],
122
+ lex("''italic\n\n"))
123
+ end
124
+
125
+ def test_headings
126
+ assert_equal(
127
+ [[:SECTION_START, "=", 0, 1], [:TEXT, "heading", 1, 7], [:SECTION_END, "=", 8, 1], [false,false, 9, 0]],
128
+ lex("=heading="))
129
+ assert_equal(
130
+ [[:SECTION_START, "==", 0, 2], [:TEXT, "heading", 2, 7], [:SECTION_END, "==", 9, 2], [false,false, 11, 0]],
131
+ lex("==heading=="))
132
+ assert_equal(
133
+ [[:SECTION_START, "==", 0, 2], [:TEXT, " 1 <= 2 ", 2, 8], [:SECTION_END, "==", 10, 2], [false,false, 12, 0]],
134
+ lex("== 1 <= 2 =="))
135
+ assert_equal(
136
+ [[:SECTION_START, "==", 0, 2], [:TEXT, "heading", 2, 7], [:SECTION_END, "==", 9, 0],
137
+ [:PARA_START, "", 9, 2], [:TEXT, "text", 11, 4], [:PARA_END, "", 15, 0], [false,false, 15, 0]],
138
+ lex("==heading==text"))
139
+ assert_equal(
140
+ [[:SECTION_START, "=", 0, 1], [:ITALIC_START, "''", 1, 2], [:TEXT, "italic", 3, 6], [:ITALIC_END, "''", 9, 2],
141
+ [:SECTION_END, "=", 11, 1], [false,false, 12, 0]],
142
+ lex("=''italic''="))
143
+ assert_equal(
144
+ [[:SECTION_START, "==", 0, 2], [:TEXT, "heading", 2, 7], [:SECTION_END, "", 9, 0], [:PARA_START, "", 9, 0],
145
+ [:TEXT, "\n\n", 9, 2], [:PARA_END, '', 11, 0], [false,false, 11, 0]],
146
+ lex("==heading\n\n"))
147
+ assert_equal(
148
+ [[:SECTION_START, "==", 0, 2], [:TEXT, "heading", 2, 7], [:SECTION_END, "", 9, 0], [:PARA_START, "", 9, 0],
149
+ [:TEXT, "\ntext", 9, 5], [:PARA_END, '', 14, 0], [false,false, 14, 0]],
150
+ lex("==heading\ntext"))
151
+ end
152
+
153
+ def test_inline_links
154
+ assert_equal(
155
+ [[:PARA_START, "", 0, 0], [:LINK_START, "", 0, 0], [:TEXT, "http://example.com", 0, 18], [:LINK_END, "", 18, 0],
156
+ [:PARA_END, "", 18, 0], [false, false, 18, 0]],
157
+ lex("http://example.com"))
158
+ assert_equal(
159
+ [[:PARA_START, "", 0, 0], [:LINK_START, "", 0, 0], [:TEXT, "https://example.com", 0, 19], [:LINK_END, "", 19, 0],
160
+ [:PARA_END, "", 19, 0], [false, false, 19, 0]],
161
+ lex("https://example.com"))
162
+ assert_equal(
163
+ [[:PARA_START, "", 0, 0], [:LINK_START, "", 0, 0], [:TEXT, "http://example.com", 0, 18], [:LINK_END, "", 18, 1],
164
+ [:PARA_END, "", 19, 0], [false, false, 19, 0]],
165
+ lex("http://example.com\n"))
166
+ assert_equal(
167
+ [[:PARA_START, "", 0, 0], [:LINK_START, "", 0, 0], [:TEXT, "http://example.com", 0, 18], [:LINK_END, "", 18, 0],
168
+ [:ITALIC_START, "''", 18, 2], [:TEXT, "italic", 20, 6], [:ITALIC_END, "''", 26, 2], [:PARA_END, "", 28, 0], [false, false, 28, 0]],
169
+ lex("http://example.com''italic''"))
170
+ assert_equal(
171
+ [[:PARA_START, "", 0, 0], [:TEXT, "http:notaurl", 0, 12], [:PARA_END, "", 12, 0], [false,false, 12, 0]],
172
+ lex("http:notaurl"))
173
+ assert_equal(
174
+ [[:SECTION_START, "=", 0, 1], [:TEXT, " ", 1, 1], [:LINK_START, "", 2, 0], [:TEXT, "http://example.com", 2, 18],
175
+ [:LINK_END, "", 20, 0], [:TEXT, " ", 20, 1], [:SECTION_END, "=", 21, 1], [false, false, 22, 0]],
176
+ lex("= http://example.com ="))
177
+ assert_equal(
178
+ [[:PARA_START, "", 0, 0], [:LINK_START, "", 0, 0], [:TEXT, "http://example.com/SpecialCharacters%C3%A7%C3%A3o", 0, 49], [:LINK_END, "", 49, 0],
179
+ [:PARA_END, "", 49, 0], [false, false, 49, 0]],
180
+ lex("http://example.com/SpecialCharacters%C3%A7%C3%A3o"))
181
+ end
182
+
183
+ def test_links
184
+ assert_equal(
185
+ [[:PARA_START, "", 0, 0], [:TEXT, "[]", 0, 2], [:PARA_END, "", 2, 0], [false, false, 2, 0]],
186
+ lex("[]"))
187
+ assert_equal(
188
+ [[:PARA_START, "", 0, 0], [:TEXT, "[ ]", 0, 3], [:PARA_END, "", 3, 0], [false, false, 3, 0]],
189
+ lex("[ ]"))
190
+ assert_equal(
191
+ [[:PARA_START, "", 0, 0], [:LINK_START, "[", 0, 1], [:TEXT, "http://example.com", 1, 18], [:LINK_END, "]", 19, 1],
192
+ [:PARA_END, "", 20, 0], [false, false, 20, 0]],
193
+ lex("[http://example.com]"))
194
+ assert_equal(
195
+ [[:PARA_START, "", 0, 0], [:LINK_START, "[", 0, 1], [:TEXT, "http://example.com", 1, 18], [:LINKSEP, " ", 19, 1],
196
+ [:LINK_END, "]", 20, 1], [:PARA_END, "", 21, 0], [false, false, 21, 0]],
197
+ lex("[http://example.com ]"))
198
+ assert_equal(
199
+ [[:PARA_START, "", 0, 0], [:LINK_START, "[", 0, 4], [:TEXT, "http://example.com", 4, 18], [:LINK_END, "]", 22, 1],
200
+ [:PARA_END, "", 23, 0], [false, false, 23, 0]],
201
+ lex("[ http://example.com]"))
202
+ assert_equal(
203
+ [[:PARA_START, "", 0, 0], [:LINK_START, "[", 0, 1], [:TEXT, "http://example.com", 1, 18], [:LINKSEP, " ", 19, 1],
204
+ [:TEXT, "example", 20, 7], [:LINK_END, "]", 27, 1], [:PARA_END, "", 28, 0], [false, false, 28, 0]],
205
+ lex("[http://example.com example]"))
206
+ assert_equal(
207
+ [[:PARA_START, "", 0, 0], [:LINK_START, "[", 0, 1], [:TEXT, "http://example.com", 1, 18], [:LINKSEP, " ", 19, 4],
208
+ [:TEXT, "example", 23, 7], [:LINK_END, "]", 30, 1], [:PARA_END, "", 31, 0], [false, false, 31, 0]],
209
+ lex("[http://example.com example]"))
210
+ assert_equal(
211
+ [[:PARA_START, "", 0, 0], [:LINK_START, "[", 0, 1], [:TEXT, "http://example.com", 1, 18], [:LINKSEP, " ", 19, 1],
212
+ [:TEXT, "this is an example", 20, 18], [:LINK_END, "]", 38, 1], [:PARA_END, "", 39, 0], [false, false, 39, 0]],
213
+ lex("[http://example.com this is an example]"))
214
+ assert_equal(
215
+ [[:PARA_START, "", 0, 0], [:LINK_START, "[", 0, 1], [:TEXT, "http://example.com", 1, 18], [:LINKSEP, " ", 19, 1],
216
+ [:ITALIC_START, "''", 20, 2], [:TEXT, "italic", 22, 6], [:ITALIC_END, "''", 28, 2], [:LINK_END, "]", 30, 1],
217
+ [:PARA_END, "", 31, 0], [false, false, 31, 0]],
218
+ lex("[http://example.com ''italic'']"))
219
+ assert_equal(
220
+ [[:PARA_START, "", 0, 0], [:LINK_START, "[", 0, 1], [:TEXT, "http://example.com", 1, 18], [:LINKSEP, " ", 19, 1],
221
+ [:TEXT, "[example", 20, 8], [:LINK_END, "]", 28, 1], [:PARA_END, "", 29, 0], [false, false, 29, 0]],
222
+ lex("[http://example.com [example]"))
223
+ assert_equal(
224
+ [[:PARA_START, "", 0, 0], [:LINK_START, "[", 0, 1], [:TEXT, "http://example.com", 1, 18], [:LINK_END, "", 19, 0],
225
+ [:TEXT, "\ntext", 19, 5], [:PARA_END, "", 24, 0], [false, false, 24, 0]],
226
+ lex("[http://example.com\ntext"))
227
+ assert_equal(
228
+ [[:PARA_START, "", 0, 0], [:TEXT, "[text]", 0, 6], [:PARA_END, "", 6, 0], [false,false, 6, 0]],
229
+ lex("[text]"))
230
+ end
231
+
232
+ def test_internal_links
233
+ assert_equal(
234
+ [[:PARA_START, "", 0, 0], [:TEXT, "[[]]", 0, 4], [:PARA_END, "", 4, 0], [false, false, 4, 0]],
235
+ lex("[[]]"))
236
+ assert_equal(
237
+ [[:PARA_START, "", 0, 0], [:INTLINK_START, "[[", 0, 2], [:TEXT, "example", 2, 7], [:INTLINK_END, "]]", 9, 2],
238
+ [:PARA_END, "", 11, 0], [false, false, 11, 0]],
239
+ lex("[[example]]"))
240
+ assert_equal(
241
+ [[:PARA_START, "", 0, 0], [:INTLINK_START, "[[", 0, 2], [:TEXT, "example page", 2, 12], [:INTLINK_END, "]]", 14, 2],
242
+ [:PARA_END, "", 16, 0], [false, false, 16, 0]],
243
+ lex("[[example page]]"))
244
+ assert_equal(
245
+ [[:PARA_START, "", 0, 0], [:INTLINK_START, "[[", 0, 2], [:TEXT, "example", 2, 7], [:INTLINKSEP, "|", 9, 1],
246
+ [:TEXT, "option", 10, 6], [:INTLINK_END, "]]", 16, 2], [:PARA_END, "", 18, 0], [false, false, 18, 0]],
247
+ lex("[[example|option]]"))
248
+ assert_equal(
249
+ [[:PARA_START, "", 0, 0], [:INTLINK_START, "[[", 0, 2], [:TEXT, "example", 2, 7], [:INTLINKSEP, "|", 9, 1],
250
+ [:TEXT, "option1|option2", 10, 15], [:INTLINK_END, "]]", 25, 2], [:PARA_END, "", 27, 0], [false, false, 27, 0]],
251
+ lex("[[example|option1|option2]]"))
252
+ assert_equal(
253
+ [[:PARA_START, "", 0, 0], [:INTLINK_START, "[[", 0, 2], [:TEXT, "resource", 2, 8], [:RESOURCESEP, ":", 10, 1],
254
+ [:TEXT, "example", 11, 7], [:INTLINKSEP, "|", 18, 1], [:TEXT, "option1", 19, 7], [:INTLINKSEP, "|", 26, 1],
255
+ [:TEXT, "option2", 27, 7], [:INTLINK_END, "]]", 34, 2], [:PARA_END, "", 36, 0], [false, false, 36, 0]],
256
+ lex("[[resource:example|option1|option2]]"))
257
+ assert_equal(
258
+ [[:PARA_START, "", 0, 0], [:INTLINK_START, "[[", 0, 2], [:TEXT, "resource", 2, 8], [:RESOURCESEP, ":", 10, 1],
259
+ [:TEXT, "example", 11, 7], [:INTLINKSEP, "|", 18, 1], [:TEXT, "this:that", 19, 9], [:INTLINK_END, "]]", 28, 2],
260
+ [:PARA_END, "", 30, 0], [false, false, 30, 0]],
261
+ lex("[[resource:example|this:that]]"))
262
+ assert_equal(
263
+ [[:PARA_START, "", 0, 0], [:INTLINK_START, "[[", 0, 2], [:TEXT, "resource", 2, 8], [:RESOURCESEP, ":", 10, 1],
264
+ [:TEXT, "example", 11, 7], [:INTLINKSEP, "|", 18, 1], [:INTLINK_START, "[[", 19, 2], [:TEXT, "link", 21, 4],
265
+ [:INTLINK_END, "]]", 25, 2], [:INTLINK_END, "]]", 27, 2], [:PARA_END, "", 29, 0], [false, false, 29, 0]],
266
+ lex("[[resource:example|[[link]]]]"))
267
+ assert_equal(
268
+ [[:PARA_START, "", 0, 0], [:INTLINK_START, "[[", 0, 2], [:TEXT, "resource", 2, 8], [:RESOURCESEP, ":", 10, 1],
269
+ [:TEXT, "example", 11, 7], [:INTLINKSEP, "|", 18, 1], [:INTLINKSEP, "|", 19, 1], [:TEXT, "option", 20, 6],
270
+ [:INTLINK_END, "]]", 26, 2], [:PARA_END, "", 28, 0], [false, false, 28, 0]],
271
+ lex("[[resource:example||option]]"))
272
+ assert_equal(
273
+ [[:PARA_START, "", 0, 0], [:INTLINK_START, "[[", 0, 2], [:TEXT, "example", 2, 7], [:INTLINKSEP, "|", 9, 1],
274
+ [:TEXT, "option", 10, 6], [:ITALIC_START, "''", 16, 2], [:TEXT, "italic", 18, 6], [:ITALIC_END, "''", 24, 2],
275
+ [:TEXT, "option", 26, 6], [:INTLINK_END, "]]", 32, 2], [:PARA_END, "", 34, 0], [false, false, 34, 0]],
276
+ lex("[[example|option''italic''option]]"))
277
+ assert_equal(
278
+ [[:PARA_START, "", 0, 0], [:INTLINK_START, "[[", 0, 2], [:TEXT, "example", 2, 7], [:INTLINKSEP, "|", 9, 1],
279
+ [:TEXT, "option[http://example.com]option", 10, 32], [:INTLINK_END, "]]", 42, 2], [:PARA_END, "", 44, 0], [false, false, 44, 0]],
280
+ lex("[[example|option[http://example.com]option]]"))
281
+ assert_equal(
282
+ [[:PARA_START, "", 0, 0], [:INTLINK_START, "[[", 0, 2], [:TEXT, "example", 2, 7], [:INTLINKSEP, "|", 9, 1],
283
+ [:TEXT, "option", 10, 6], [:INTLINK_END, "", 16, 0], [:TEXT, "\n\n", 16, 2], [:PARA_END, "", 18, 0], [false, false, 18, 0]],
284
+ lex("[[example|option\n\n"))
285
+ end
286
+
287
+ def test_table
288
+ assert_equal([[:TABLE_START, "{|", 0, 3], [:TABLE_END, "|}", 3, 2], [false, false, 5, 0]],
289
+ lex("{|\n|}"))
290
+ assert_equal([[:TABLE_START, "{|", 0, 2], [:TEXT, " width='100%'\n", 2, 14], [:TABLE_END, "|}", 16, 2], [false, false, 18, 0]],
291
+ lex("{| width='100%'\n|}"))
292
+ assert_equal([[:TABLE_START, "{|", 0, 3], [:ROW_START, "", 3, 0], [:CELL_START, "|", 3, 1], [:TEXT, "a\n", 4, 2],
293
+ [:CELL_END, "", 6, 0], [:CELL_START, "|", 6, 1], [:TEXT, "b\n", 7, 2], [:CELL_END, "", 9, 0], [:ROW_END, "", 9, 0],
294
+ [:TABLE_END, "|}", 9, 2], [false, false, 11, 0]],
295
+ lex("{|\n|a\n|b\n|}"))
296
+ assert_equal([[:TABLE_START, "{|", 0, 3], [:ROW_START, "", 3, 0], [:CELL_START, "|", 3, 1], [:TEXT, "a", 4, 1],
297
+ [:CELL_END, "", 5, 0], [:CELL_START, "||", 5, 2], [:TEXT, "b\n", 7, 2], [:CELL_END, "", 9, 0], [:ROW_END, "", 9, 0],
298
+ [:TABLE_END, "|}", 9, 2], [false, false, 11, 0]],
299
+ lex("{|\n|a||b\n|}"))
300
+ assert_equal([[:TABLE_START, "{|", 0, 3], [:ROW_START, "", 3, 0], [:CELL_START, "|", 3, 1], [:TEXT, "a\n", 4, 2],
301
+ [:CELL_END, "", 6, 0], [:ROW_END, "", 6, 0], [:ROW_START, "|-", 6, 3], [:CELL_START, "|", 9, 1], [:TEXT, "b\n", 10, 2],
302
+ [:CELL_END, "", 12, 0], [:ROW_END, "", 12, 0], [:TABLE_END, "|}", 12, 2], [false, false, 14, 0]],
303
+ lex("{|\n|a\n|-\n|b\n|}"))
304
+ assert_equal([[:TABLE_START, "{|", 0, 3], [:ROW_START, "", 3, 0], [:CELL_START, "|", 3, 1], [:TEXT, "a\n", 4, 2],
305
+ [:CELL_END, "", 6, 0], [:ROW_END, "", 6, 0], [:ROW_START, "|-", 6, 2], [:TEXT, " align='left'\n", 8, 14],
306
+ [:CELL_START, "|", 22, 1], [:TEXT, "b\n", 23, 2], [:CELL_END, "", 25, 0], [:ROW_END, "", 25, 0], [:TABLE_END, "|}", 25, 2],
307
+ [false, false, 27, 0]],
308
+ lex("{|\n|a\n|- align='left'\n|b\n|}"))
309
+ assert_equal([[:TABLE_START, "{|", 0, 3], [:ROW_START, "", 3, 0], [:CELL_START, "|", 3, 1], [:TEXT, "a\n", 4, 2],
310
+ [:CELL_END, "", 6, 0], [:ROW_END, "", 6, 0], [:ROW_START, "|-", 6, 3], [:CELL_START, "|", 9, 1],
311
+ [:TEXT, ' colspan="4" align="center" style="background:#ffdead;"', 10, 55], [:CELL_END, "attributes", 65, 0],
312
+ [:CELL_START, "|", 65, 1], [:TEXT, " b\n", 66, 3], [:CELL_END, "", 69, 0], [:ROW_END, "", 69, 0], [:TABLE_END, "|}", 69, 2],
313
+ [false, false, 71, 0]],
314
+ lex("{|\n|a\n|-\n| colspan=\"4\" align=\"center\" style=\"background:#ffdead;\"| b\n|}"))
315
+ end
80
316
 
81
- def test_bullets
82
- assert_equal(lex("* Foo"),
83
- [[:UL_START, ""], [:LI_START, ""], [:TEXT, "Foo"], [:LI_END, ""], [:UL_END, ""], [false, false]])
84
- end
317
+ def test_preformatted
318
+ assert_equal([[:PARA_START, '', 0, 0], [:TEXT, " ", 0, 2], [:PARA_END, '', 2, 0], [false, false, 2, 0]],
319
+ lex(" "))
320
+ assert_equal([[:PARA_START, '', 0, 0], [:TEXT, " \n", 0, 3], [:PARA_END, '', 3, 0], [false, false, 3, 0]],
321
+ lex(" \n"))
322
+ assert_equal([[:PARA_START, '', 0, 0], [:TEXT, " \n", 0, 10], [:PARA_END, '', 10, 0], [false, false, 10, 0]],
323
+ lex(" \n"))
324
+ assert_equal([[:PREINDENT_START, '', 0, 0], [:TEXT, " text\n", 0, 6], [:PREINDENT_END, '', 6, 0], [false, false, 6, 0]],
325
+ lex(" text\n"))
326
+ assert_equal([[:PREINDENT_START, '', 0, 0], [:TEXT, " text\r\n", 0, 7], [:PREINDENT_END, '', 7, 0], [false, false, 7, 0]],
327
+ lex(" text\r\n"))
328
+ assert_equal([[:PREINDENT_START, '', 0, 0], [:TEXT, " text\n text\n", 0, 12], [:PREINDENT_END, '', 12, 0], [false, false, 12, 0]],
329
+ lex(" text\n text\n"))
330
+ assert_equal([[:PARA_START, '', 0, 0], [:TEXT, "text\n", 0, 5], [:PARA_END, '', 5, 0], [:PREINDENT_START, '', 5, 0],
331
+ [:TEXT, " text\n", 5, 6], [:PREINDENT_END, '', 11, 0], [false, false, 11, 0]],
332
+ lex("text\n text\n"))
333
+ assert_equal([[:PREINDENT_START, '', 0, 0], [:TEXT, " text\n", 0, 6], [:PREINDENT_END, '', 6, 0], [:PARA_START, '', 6, 0],
334
+ [:TEXT, "text\n", 6, 5], [:PARA_END, '', 11, 0], [false, false, 11, 0]],
335
+ lex(" text\ntext\n"))
336
+ assert_equal([[:PREINDENT_START, '', 0, 0], [:TEXT, ' ', 0, 1], [:ITALIC_START, "''", 1, 2], [:TEXT, "italic", 3, 6],
337
+ [:ITALIC_END, "''", 9, 2], [:TEXT, "\n", 11, 1], [:PREINDENT_END, '', 12, 0], [false, false, 12, 0]],
338
+ lex(" ''italic''\n"))
339
+ end
340
+
341
+ def test_hline
342
+ assert_equal([[:HLINE, "----", 0, 4], [false, false, 4, 0]], lex("----"))
343
+ assert_equal([[:HLINE, "----", 1, 4], [false, false, 5, 0]], lex("\n----"))
344
+ assert_equal([[:HLINE, "----", 2, 4], [false, false, 6, 0]], lex("\r\n----"))
345
+ assert_equal([[:PARA_START, "", 0, 0], [:TEXT, "text\n", 0, 5], [:PARA_END, "", 5, 0], [:HLINE, "----", 5, 4], [false, false, 9, 0]],
346
+ lex("text\n----"))
347
+ assert_equal([[:PARA_START, "", 0, 0], [:TEXT, "text\r\n", 0, 6], [:PARA_END, "", 6, 0], [:HLINE, "----", 6, 4], [false, false, 10, 0]],
348
+ lex("text\r\n----"))
349
+ assert_equal([[:PARA_START, "", 0, 0], [:TEXT, "text\n\n", 0, 6], [:PARA_END, "", 6, 0], [:HLINE, "----", 6, 4], [false, false, 10, 0]],
350
+ lex("text\n\n----"))
351
+ assert_equal([[:HLINE, "----", 0, 4], [:PARA_START, "", 4, 0], [:TEXT, "\ntext", 4, 5], [:PARA_END, "", 9, 0], [false, false, 9, 0]],
352
+ lex("----\ntext"))
353
+ assert_equal([[:HLINE, "----", 0, 4], [:PARA_START, "", 4, 0], [:TEXT, "\r\ntext", 4, 6], [:PARA_END, "", 10, 0], [false, false, 10, 0]],
354
+ lex("----\r\ntext"))
355
+ assert_equal([[:HLINE, "----", 0, 4], [:PARA_START, "", 4, 0], [:TEXT, "\n\n", 4, 2], [:PARA_END, "", 6, 0], [:PARA_START, "", 6, 0],
356
+ [:TEXT, "text", 6, 4], [:PARA_END, "", 10, 0], [false, false, 10, 0]],
357
+ lex("----\n\ntext"))
358
+ end
359
+
360
+ def test_nowiki
361
+ assert_equal([[:PARA_START, "", 0, 8], [:TEXT, "''italic''", 8, 10], [:PARA_END, "", 27, 0], [false, false, 27, 0]],
362
+ lex("<nowiki>''italic''</nowiki>"))
363
+ assert_equal([[:PARA_START, "", 0, 0], [:TEXT, "text''italic''text", 0, 18], [:PARA_END, "", 35, 0], [false, false, 35, 0]],
364
+ lex("text<nowiki>''italic''</nowiki>text"))
365
+ assert_equal([[:PARA_START, "", 0, 8], [:TEXT, "<u>uuu</u>", 8, 10], [:PARA_END, "", 27, 0], [false, false, 27, 0]],
366
+ lex("<nowiki><u>uuu</u></nowiki>"))
367
+ assert_equal([[:PARA_START, "", 0, 0], [:TEXT, "texttext", 0, 8], [:PARA_END, "", 17, 0], [false, false, 17, 0]],
368
+ lex("text<nowiki/>text"))
369
+ end
370
+
371
+ def test_math
372
+ assert_equal([[:PARA_START, "", 0, 0], [:TAG_START, "math", 0, 6], [:TEXT, "1 == 1 == 1", 6, 11], [:TAG_END, "math", 17, 7],
373
+ [:PARA_END, "", 24, 0], [false, false, 24, 0]],
374
+ lex("<math>1 == 1 == 1</math>"))
375
+ assert_equal([[:PARA_START, "", 0, 0], [:TAG_START, "math", 0, 6], [:TEXT, "1 == 1", 6, 6], [:TAG_END, "math", 12, 7],
376
+ [:TEXT, "xxx", 19, 3], [:TAG_START, "math", 22, 6], [:TEXT, "1 == 1", 28, 6], [:TAG_END, "math", 34, 7],
377
+ [:PARA_END, "", 41, 0], [false, false, 41, 0]],
378
+ lex("<math>1 == 1</math>xxx<math>1 == 1</math>"))
379
+ assert_equal([[:PARA_START, "", 0, 0], [:TAG_START, "math", 0, 0], [:TAG_END, "math", 0, 7], [:PARA_END, "", 7, 0], [false, false, 7, 0]],
380
+ lex("<math/>"))
381
+ end
382
+
383
+ def test_pre
384
+ assert_equal([[:TAG_START, "pre", 0, 0], [:ATTR_NAME, "name", 0, 0], [:ATTR_VALUE, "code", 0, 17],
385
+ [:TEXT, "1 == 1 == 1", 17, 11], [:TAG_END, "pre", 28, 6], [false, false, 34, 0]],
386
+ lex("<pre name='code'>1 == 1 == 1</pre>"))
387
+ assert_equal([[:TAG_START, "pre", 0, 0], [:TAG_END, "pre", 0, 6], [false, false, 6, 0]],
388
+ lex("<pre/>"))
389
+ assert_equal([[:TAG_START, "pre", 0, 5], [:TEXT, "1 == 1", 5, 6], [:TAG_END, "pre", 11, 0], [:PARA_START, "", 11, 7], [:TEXT, "xxx\n", 18, 4],
390
+ [:PARA_END, "", 22, 0], [:TAG_START, "pre", 22, 5], [:TEXT, "1 == 1", 27, 6], [:TAG_END, "pre", 33, 6], [false, false, 39, 0]],
391
+ lex("<pre>1 == 1</pre>\nxxx\n<pre>1 == 1</pre>"))
392
+ end
393
+
394
+ def test_template
395
+ assert_equal([[:PARA_START, "", 0, 0], [:TEMPLATE_START, "{{", 0, 2], [:TEXT, "ref", 2, 3], [:TEMPLATE_END, "}}", 5, 2],
396
+ [:PARA_END, "", 7, 0], [false, false, 7, 0]],
397
+ lex("{{ref}}"))
398
+ assert_equal([[:PARA_START, "", 0, 0], [:TEMPLATE_START, "{{", 0, 2], [:TEXT, "ref1}ref2", 2, 9], [:TEMPLATE_END, "}}", 11, 2],
399
+ [:PARA_END, "", 13, 0], [false, false, 13, 0]],
400
+ lex("{{ref1}ref2}}"))
401
+ assert_equal([[:PARA_START, "", 0, 0], [:TEMPLATE_START, "{{", 0, 2], [:TEXT, "ref1\n{", 2, 6],
402
+ [:INTLINKSEP, "|", 8, 2], [:INTLINKSEP, "|", 10, 1], [:TEXT, "not a table!\n", 11, 13], [:INTLINKSEP, "|", 24, 1], [:TEXT, "} ", 25, 2],
403
+ [:TEMPLATE_END, "}}", 27, 2], [:PARA_END, "", 29, 0], [false, false, 29, 0]],
404
+ lex("{{ref1\n{|\n|not a table!\n|} }}"))
405
+ assert_equal([[:PARA_START, "", 0, 0], [:TEXT, "{{}}", 0, 4], [:PARA_END, "", 4, 0], [false, false, 4, 0]],
406
+ lex("{{}}"))
407
+ # nested templates are not yet supported
408
+ # assert_equal([[:PARA_START, ""], [:TEMPLATE_START, "{{"], [:TEXT, "xxx"], [:TEMPLATE_START, "{{"],
409
+ # [:TEXT, "iii"], [:TEMPLATE_END, "}}"], [:TEXT, "xxx"], [:TEMPLATE_END, "}}"],
410
+ # [:PARA_END, ""], [false, false, 0, 0]],
411
+ # lex("{{xxx{{iii}}xxx}}"))
412
+ end
413
+
414
+ def test_xhtml_markup
415
+ # not closed angles
416
+ assert_equal([[:PARA_START, "", 0, 0], [:TEXT, "<", 0, 1], [:PARA_END, "", 1, 0], [false, false, 1, 0]], lex("<"))
417
+ assert_equal([[:PARA_START, "", 0, 0], [:TEXT, ">", 0, 1], [:PARA_END, "", 1, 0], [false, false, 1, 0]], lex(">"))
418
+ assert_equal([[:PARA_START, "", 0, 0], [:TEXT, "< ", 0, 2], [:PARA_END, "", 2, 0], [false, false, 2, 0]], lex("< "))
419
+ assert_equal([[:PARA_START, "", 0, 0], [:TEXT, "> ", 0, 2], [:PARA_END, "", 2, 0], [false, false, 2, 0]], lex("> "))
420
+ assert_equal([[:PARA_START, "", 0, 0], [:TEXT, "text <", 0, 6], [:PARA_END, "", 6, 0], [false, false, 6, 0]],
421
+ lex("text <"))
422
+ assert_equal([[:PARA_START, "", 0, 0], [:TEXT, "text >", 0, 6], [:PARA_END, "", 6, 0], [false, false, 6, 0]],
423
+ lex("text >"))
424
+ assert_equal([[:PARA_START, "", 0, 0], [:TEXT, "text < ", 0, 7], [:PARA_END, "", 7, 0], [false, false, 7, 0]],
425
+ lex("text < "))
426
+ assert_equal([[:PARA_START, "", 0, 0], [:TEXT, "text > ", 0, 7], [:PARA_END, "", 7, 0], [false, false, 7, 0]],
427
+ lex("text > "))
428
+ assert_equal([[:PARA_START, "", 0, 0], [:TEXT, "< text", 0, 6], [:PARA_END, "", 6, 0], [false, false, 6, 0]],
429
+ lex("< text"))
430
+ assert_equal([[:PARA_START, "", 0, 0], [:TEXT, "> text ", 0, 7],[:PARA_END, "", 7, 0], [false, false, 7, 0]],
431
+ lex("> text "))
85
432
 
86
- def test_nested_bullets
87
- assert_equal(lex("**Foo"), [[:UL_START, ""], [:LI_START, ""],
88
- [:UL_START, ""], [:LI_START, ""], [:TEXT, "Foo"], [:LI_END, ""],
89
- [:UL_END, ""], [:LI_END, ""], [:UL_END, ""], [false, false]])
90
- end
433
+ assert_equal([[:PARA_START, "", 0, 0], [:TAG_START, "tt", 0, 4], [:TEXT, "text", 4, 4], [:TAG_END, "tt", 8, 5],
434
+ [:PARA_END, "", 13, 0], [false, false, 13, 0]],
435
+ lex("<tt>text</tt>"))
436
+ assert_equal([[:PARA_START, "", 0, 0], [:TAG_START, "tt", 0, 0], [:TAG_END, "tt", 0, 5], [:PARA_END, "", 5, 0], [false, false, 5, 0]],
437
+ lex("<tt/>"))
438
+ assert_equal([[:PARA_START, "", 0, 0], [:TAG_START, "tt", 0, 0], [:TAG_END, "tt", 0, 6], [:PARA_END, "", 6, 0], [false, false, 6, 0]],
439
+ lex("<tt />"))
440
+ assert_equal([[:PARA_START, "", 0, 0], [:TEXT, "<123>", 0, 5], [:PARA_END, "", 5, 0], [false, false, 5, 0]],
441
+ lex("<123>"))
442
+ assert_equal([[:PARA_START, "", 0, 0], [:TEXT, "<xx xx>", 0, 7], [:PARA_END, "", 7, 0], [false, false, 7, 0]],
443
+ lex("<xx xx>"))
444
+ assert_equal([[:PARA_START, "", 0, 0], [:TEXT, "</xxx ", 0, 6], [:PARA_END, "", 6, 0], [false, false, 6, 0]],
445
+ lex("</xxx "))
446
+ assert_equal([[:PARA_START, "", 0, 0], [:TEXT, "<xx </xx>", 0, 9], [:PARA_END, "", 9, 0], [false, false, 9, 0]],
447
+ lex("<xx </xx>"))
448
+ assert_equal([[:PARA_START, "", 0, 0], [:TEXT, "<xx a='b' c>", 0, 12], [:PARA_END, "", 12, 0], [false, false, 12, 0]],
449
+ lex("<xx a='b' c>"))
450
+ assert_equal([[:PARA_START, "", 0, 0], [:TEXT, "<>", 0, 2], [:PARA_END, "", 2, 0], [false, false, 2, 0]],
451
+ lex("<>"))
452
+ assert_equal([[:PARA_START, "", 0, 0], [:TAG_START, "tt", 0, 0], [:ATTR_NAME, 'class', 0, 0], [:ATTR_VALUE, 'tt', 0, 15],
453
+ [:TEXT, "text", 15, 4], [:TAG_END, "tt", 19, 5], [:PARA_END, "", 24, 0], [false, false, 24, 0]],
454
+ lex("<tt class='tt'>text</tt>"))
455
+ assert_equal([[:PARA_START, "", 0, 0], [:TAG_START, "tt", 0, 0], [:ATTR_NAME, 'class', 0, 0], [:ATTR_VALUE, 'tt', 0, 20],
456
+ [:TEXT, "text", 20, 4], [:TAG_END, "tt", 24, 6], [:PARA_END, "", 30, 0], [false, false, 30, 0]],
457
+ lex("<tt class = 'tt' >text</tt >"))
458
+ assert_equal([[:PARA_START, "", 0, 0], [:TAG_START, "tt", 0, 0], [:ATTR_NAME, 'class', 0, 0], [:ATTR_VALUE, 'tt', 0, 18],
459
+ [:TEXT, "text", 18, 4], [:TAG_END, "tt", 22, 6], [:PARA_END, "", 28, 0], [false, false, 28, 0]],
460
+ lex("<tt\nclass\n=\n'tt'\n>text</tt\n>"))
461
+ assert_equal([[:PARA_START, "", 0, 0], [:TAG_START, "tt", 0, 0], [:ATTR_NAME, 'class', 0, 0], [:ATTR_VALUE, 'tt', 0, 0],
462
+ [:TAG_END, "tt", 0, 17], [:PARA_END, "", 17, 0], [false, false, 17, 0]],
463
+ lex("<tt class='tt' />"))
464
+ assert_equal([[:PARA_START, "", 0, 0], [:TAG_START, "tt", 0, 4], [:TEXT, "\ntext\n", 4, 6], [:TAG_END, "tt", 10, 5],
465
+ [:PARA_END, "", 15, 0], [false, false, 15, 0]],
466
+ lex("<tt>\ntext\n</tt>"))
467
+ assert_equal([[:PARA_START, "", 0, 0], [:TAG_START, "tt", 0, 4], [:TEXT, "\n\ntext\n", 4, 7], [:TAG_END, "tt", 11, 5],
468
+ [:PARA_END, "", 16, 0], [false, false, 16, 0]],
469
+ lex("<tt>\n\ntext\n</tt>"))
470
+ assert_equal([[:PARA_START, "", 0, 0], [:TAG_START, "tt", 0, 12], [:TEXT, "<tt/>", 12, 5], [:TAG_END, "tt", 26, 5],
471
+ [:PARA_END, "", 31, 0], [false, false, 31, 0]],
472
+ lex("<tt><nowiki><tt/></nowiki></tt>"))
473
+ assert_equal([[:PARA_START, "", 0, 0], [:TAG_START, "tt", 0, 4], [:PASTE_START, "", 4, 7], [:TEXT, "paste", 11, 5],
474
+ [:PASTE_END, "", 16, 8], [:TAG_END, "tt", 24, 5], [:PARA_END, "", 29, 0], [false, false, 29, 0]],
475
+ lex("<tt><paste>paste</paste></tt>"))
476
+ assert_equal([[:PARA_START, "", 0, 0], [:LINK_START, "[", 0, 1], [:TEXT, "http://example.com", 1, 18], [:LINKSEP, " ", 19, 1],
477
+ [:TAG_START, "tt", 20, 4], [:TEXT, "text", 24, 4], [:TAG_END, "tt", 28, 5], [:LINK_END, "]", 33, 1], [:PARA_END, "", 34, 0], [false, false, 34, 0]],
478
+ lex("[http://example.com <tt>text</tt>]"))
479
+ assert_equal([[:PARA_START, "", 0, 0], [:INTLINK_START, "[[", 0, 2], [:TEXT, "page", 2, 4], [:INTLINKSEP, "|", 6, 1],
480
+ [:TAG_START, "tt", 7, 4], [:TEXT, "text", 11, 4], [:TAG_END, "tt", 15, 5], [:INTLINK_END, "]]", 20, 2], [:PARA_END, "", 22, 0], [false, false, 22, 0]],
481
+ lex("[[page|<tt>text</tt>]]"))
482
+ end
483
+
484
+ def test_xhtml_char_entities
485
+ assert_equal([[:PARA_START, "", 0, 0], [:CHAR_ENT, "lt", 0, 4], [:PARA_END, "", 4, 0], [false, false, 4, 0]],
486
+ lex("&lt;"))
487
+ assert_equal([[:PARA_START, "", 0, 0], [:CHAR_ENT, "amp", 0, 5], [:TEXT, "amp;", 5, 4], [:PARA_END, "", 9, 0], [false, false, 9, 0]],
488
+ lex("&amp;amp;"))
489
+ assert_equal([[:PARA_START, "", 0, 0], [:TEXT, "&", 0, 1], [:PARA_END, "", 1, 0], [false, false, 1, 0]],
490
+ lex("&"))
491
+ assert_equal([[:PARA_START, "", 0, 0], [:TEXT, "&amp", 0, 4], [:PARA_END, "", 4, 0], [false, false, 4, 0]],
492
+ lex("&amp"))
493
+ assert_equal([[:PARA_START, "", 0, 0], [:TEXT, "&amp ;", 0, 6], [:PARA_END, "", 6, 0], [false, false, 6, 0]],
494
+ lex("&amp ;"))
495
+ assert_equal([[:PARA_START, "", 0, 0], [:TEXT, "&amp amp;", 0, 9], [:PARA_END, "", 9, 0], [false, false, 9, 0]],
496
+ lex("&amp amp;"))
497
+ assert_equal([[:PARA_START, "", 0, 0], [:TEXT, "&amp", 0, 4], [:CHAR_ENT, "amp", 4, 5], [:PARA_END, "", 9, 0], [false, false, 9, 0]],
498
+ lex("&amp&amp;"))
499
+ end
500
+
501
+ def test_unordered_lists
502
+ assert_equal([[:UL_START, '', 0, 0], [:LI_START, '', 0, 1], [:TEXT, "a", 1, 1], [:LI_END, '', 2, 0], [:UL_END, '', 2, 0],
503
+ [false, false, 2, 0]],
504
+ lex("*a"))
505
+ assert_equal([[:UL_START, '', 0, 0], [:LI_START, '', 0, 1], [:TEXT, "a\n", 1, 2], [:LI_END, '', 3, 0], [:UL_END, '', 3, 0],
506
+ [false, false, 3, 0]],
507
+ lex("*a\n"))
508
+ assert_equal([[:UL_START, '', 1, 0], [:LI_START, '', 1, 1], [:TEXT, "a", 2, 1], [:LI_END, '', 3, 0], [:UL_END, '', 3, 0],
509
+ [false, false, 3, 0]],
510
+ lex("\n*a"))
511
+ assert_equal([[:PARA_START, "", 0, 0], [:TEXT, "text\n", 0, 5], [:PARA_END, "", 5, 0], [:UL_START, '', 5, 0],
512
+ [:LI_START, '', 5, 1], [:TEXT, "a", 6, 1], [:LI_END, '', 7, 0], [:UL_END, '', 7, 0], [false, false, 7, 0]],
513
+ lex("text\n*a"))
514
+ assert_equal([[:UL_START, '', 0, 0], [:LI_START, '', 0, 1], [:TEXT, "a\n", 1, 2], [:LI_END, '', 3, 0],
515
+ [:LI_START, '', 3, 1], [:TEXT, "b\n", 4, 2], [:LI_END, '', 6, 0], [:UL_END, '', 6, 0], [false, false, 6, 0]],
516
+ lex("*a\n*b\n"))
517
+ assert_equal([[:UL_START, '', 0, 0], [:LI_START, '', 0, 1], [:TEXT, "a\n", 1, 2],
518
+ [:UL_START, '', 3, 0], [:LI_START, '', 3, 2], [:TEXT, "i\n", 5, 2], [:LI_END, '', 7, 0], [:UL_END, '', 7, 0], [:LI_END, '', 7, 0],
519
+ [:LI_START, '', 7, 1], [:TEXT, "b\n", 8, 2], [:LI_END, '', 10, 0], [:UL_END, '', 10, 0], [false, false, 10, 0]],
520
+ lex("*a\n**i\n*b\n"))
521
+ assert_equal([[:UL_START, '', 0, 0], [:LI_START, '', 0, 1], [:TEXT, "a\n", 1, 2],
522
+ [:UL_START, '', 3, 0], [:LI_START, '', 3, 2], [:TEXT, "i\n", 5, 2], [:LI_END, '', 7, 0], [:UL_END, '', 7, 0], [:LI_END, '', 7, 0],
523
+ [:UL_END, '', 7, 0], [false, false, 7, 0]],
524
+ lex("*a\n**i\n"))
525
+ assert_equal([[:UL_START, '', 0, 0], [:LI_START, '', 0, 1], [:UL_START, '', 1, 0],
526
+ [:LI_START, '', 1, 1], [:TEXT, "i\n", 2, 2], [:LI_END, '', 4, 0], [:UL_END, '', 4, 0], [:LI_END, '', 4, 0],
527
+ [:UL_END, '', 4, 0], [false, false, 4, 0]],
528
+ lex("**i\n"))
529
+ assert_equal([[:UL_START, '', 0, 0], [:LI_START, '', 0, 1], [:UL_START, '', 1, 0],
530
+ [:LI_START, '', 1, 1], [:TEXT, "i\n", 2, 2], [:LI_END, '', 4, 0], [:UL_END, '', 4, 0], [:LI_END, '', 4, 0],
531
+ [:LI_START, '', 4, 1], [:TEXT, "b\n", 5, 2], [:LI_END, '', 7, 0], [:UL_END, '', 7, 0], [false, false, 7, 0]],
532
+ lex("**i\n*b\n"))
533
+ end
534
+
535
+ def test_ordered_lists
536
+ assert_equal([[:OL_START, "", 0, 0], [:LI_START, "", 0, 1], [:TEXT, "a", 1, 1], [:LI_END, "", 2, 0], [:OL_END, "", 2, 0],
537
+ [false, false, 2, 0]],
538
+ lex("#a"))
539
+ assert_equal([[:OL_START, "", 0, 0], [:LI_START, "", 0, 1], [:TEXT, "a\n", 1, 2], [:LI_END, "", 3, 0], [:OL_END, "", 3, 0],
540
+ [false, false, 3, 0]],
541
+ lex("#a\n"))
542
+ assert_equal([[:OL_START, '', 1, 0], [:LI_START, '', 1, 1], [:TEXT, "a", 2, 1], [:LI_END, '', 3, 0], [:OL_END, '', 3, 0],
543
+ [false, false, 3, 0]],
544
+ lex("\n#a"))
545
+ assert_equal([[:PARA_START, "", 0, 0], [:TEXT, "text\n", 0, 5], [:PARA_END, "", 5, 0], [:OL_START, '', 5, 0],
546
+ [:LI_START, '', 5, 1], [:TEXT, "a", 6, 1], [:LI_END, '', 7, 0], [:OL_END, '', 7, 0], [false, false, 7, 0]],
547
+ lex("text\n#a"))
548
+ assert_equal([[:OL_START, '', 0, 0], [:LI_START, '', 0, 1], [:TEXT, "a\n", 1, 2], [:LI_END, '', 3, 0],
549
+ [:LI_START, '', 3, 1], [:TEXT, "b\n", 4, 2], [:LI_END, '', 6, 0], [:OL_END, '', 6, 0], [false, false, 6, 0]],
550
+ lex("#a\n#b\n"))
551
+ assert_equal([[:OL_START, "", 0, 0], [:LI_START, "", 0, 1], [:TEXT, "a\n", 1, 2], [:OL_START, "", 3, 0], [:LI_START, "", 3, 2],
552
+ [:TEXT, "i\n", 5, 2], [:LI_END, "", 7, 0], [:OL_END, "", 7, 0], [:LI_END, "", 7, 0], [:LI_START, "", 7, 1], [:TEXT, "b\n", 8, 2],
553
+ [:LI_END, "", 10, 0], [:OL_END, "", 10, 0], [false, false, 10, 0]],
554
+ lex("#a\n##i\n#b\n"))
555
+ assert_equal([[:OL_START, "", 0, 0], [:LI_START, "", 0, 1], [:TEXT, "a\n", 1, 2], [:OL_START, "", 3, 0], [:LI_START, "", 3, 2],
556
+ [:TEXT, "i\n", 5, 2], [:LI_END, "", 7, 0], [:OL_END, "", 7, 0], [:LI_END, "", 7, 0], [:OL_END, "", 7, 0],
557
+ [false, false, 7, 0]],
558
+ lex("#a\n##i\n"))
559
+ assert_equal([[:OL_START, "", 0, 0], [:LI_START, "", 0, 1], [:OL_START, "", 1, 0], [:LI_START, "", 1, 1], [:TEXT, "i\n", 2, 2],
560
+ [:LI_END, "", 4, 0], [:OL_END, "", 4, 0], [:LI_END, "", 4, 0], [:OL_END, "", 4, 0], [false, false, 4, 0]],
561
+ lex("##i\n"))
562
+ assert_equal([[:OL_START, "", 0, 0], [:LI_START, "", 0, 1], [:OL_START, "", 1, 0], [:LI_START, "", 1, 1], [:TEXT, "i\n", 2, 2],
563
+ [:LI_END, "", 4, 0], [:OL_END, "", 4, 0], [:LI_END, "", 4, 0], [:LI_START, "", 4, 1], [:TEXT, "b\n", 5, 2], [:LI_END, "", 7, 0],
564
+ [:OL_END, "", 7, 0], [false, false, 7, 0]],
565
+ lex("##i\n#b\n"))
566
+ end
567
+
568
+ def test_mixed_lists
569
+ assert_equal([[:UL_START, '', 0, 0], [:LI_START, '', 0, 1], [:TEXT, "a\n", 1, 2], [:LI_END, '', 3, 0], [:UL_END, '', 3, 0],
570
+ [:OL_START, '', 3, 0], [:LI_START, '', 3, 1], [:TEXT, "b\n", 4, 2], [:LI_END, '', 6, 0], [:OL_END, '', 6, 0], [false, false, 6, 0]],
571
+ lex("*a\n#b\n"))
572
+ assert_equal([[:OL_START, '', 0, 0], [:LI_START, '', 0, 1], [:TEXT, "a\n", 1, 2],
573
+ [:UL_START, '', 3, 0], [:LI_START, '', 3, 2], [:TEXT, "i\n", 5, 2], [:LI_END, '', 7, 0], [:UL_END, '', 7, 0], [:LI_END, '', 7, 0],
574
+ [:LI_START, '', 7, 1], [:TEXT, "b\n", 8, 2], [:LI_END, '', 10, 0], [:OL_END, '', 10, 0], [false, false, 10, 0]],
575
+ lex("#a\n#*i\n#b\n"))
576
+ assert_equal([[:UL_START, '', 0, 0], [:LI_START, '', 0, 1], [:TEXT, "a\n", 1, 2],
577
+ [:OL_START, '', 3, 0], [:LI_START, '', 3, 2], [:TEXT, "i\n", 5, 2], [:LI_END, '', 7, 0], [:OL_END, '', 7, 0], [:LI_END, '', 7, 0],
578
+ [:LI_START, '', 7, 1], [:TEXT, "b\n", 8, 2], [:LI_END, '', 10, 0], [:UL_END, '', 10, 0], [false, false, 10, 0]],
579
+ lex("*a\n*#i\n*b\n"))
580
+ end
581
+
582
+ def test_definition_lists
583
+ assert_equal([[:DL_START, "", 0, 0], [:DT_START, ";", 0, 1], [:TEXT, "a", 1, 1], [:DT_END, "", 2, 0], [:DL_END, "", 2, 0],
584
+ [false, false, 2, 0]],
585
+ lex(";a"))
586
+ assert_equal([[:DL_START, '', 0, 0], [:DT_START, ';', 0, 1], [:TEXT, "a\n", 1, 2], [:DT_END, '', 3, 0], [:DL_END, '', 3, 0],
587
+ [false, false, 3, 0]],
588
+ lex(";a\n"))
589
+ assert_equal([[:DL_START, "", 0, 0], [:DD_START, ":", 0, 1], [:TEXT, "b", 1, 1], [:DD_END, "", 2, 0], [:DL_END, "", 2, 0],
590
+ [false, false, 2, 0]],
591
+ lex(":b"))
592
+ assert_equal([[:DL_START, '', 0, 0], [:DD_START, ':', 0, 1], [:TEXT, "b\n", 1, 2], [:DD_END, '', 3, 0], [:DL_END, '', 3, 0],
593
+ [false, false, 3, 0]],
594
+ lex(":b\n"))
595
+ assert_equal([[:DL_START, '', 0, 0], [:DT_START, ';', 0, 1], [:TEXT, "a\n", 1, 2], [:DT_END, '', 3, 0],
596
+ [:DD_START, ':', 3, 1], [:TEXT, "b\n", 4, 2], [:DD_END, '', 6, 0], [:DL_END, '', 6, 0], [false, false, 6, 0]],
597
+ lex(";a\n:b\n"))
598
+ assert_equal([[:DL_START, '', 0, 0], [:DT_START, ';', 0, 1], [:TEXT, "a", 1, 1], [:DT_END, '', 2, 0],
599
+ [:DD_START, ':', 2, 1], [:TEXT, "b\n", 3, 2], [:DD_END, '', 5, 0], [:DL_END, '', 5, 0], [false, false, 5, 0]],
600
+ lex(";a:b\n"))
601
+ assert_equal([[:DL_START, '', 0, 0], [:DT_START, ';', 0, 1], [:TEXT, "a\n", 1, 2], [:DT_END, '', 3, 0],
602
+ [:DD_START, ':', 3, 1], [:TEXT, "b\n", 4, 2], [:DD_END, '', 6, 0], [:DD_START, ':', 6, 1], [:TEXT, "c\n", 7, 2],
603
+ [:DD_END, '', 9, 0],[:DL_END, '', 9, 0], [false, false, 9, 0]],
604
+ lex(";a\n:b\n:c\n"))
605
+ assert_equal([[:DL_START, '', 0, 0], [:DT_START, ';', 0, 1], [:TEXT, "a\n", 1, 2], [:DT_END, '', 3, 0], [:DL_END, '', 3, 0],
606
+ [:DL_START, '', 3, 0], [:DT_START, ';', 3, 1], [:TEXT, "a\n", 4, 2], [:DT_END, '', 6, 0], [:DL_END, '', 6, 0],
607
+ [false, false, 6, 0]],
608
+ lex(";a\n;a\n"))
609
+ assert_equal([[:DL_START, '', 0, 0], [:DT_START, ';', 0, 1], [:TEXT, "a\n", 1, 2], [:DT_END, '', 3, 0], [:DL_END, '', 3, 0],
610
+ [:PARA_START, '', 3, 0], [:TEXT, 'text', 3, 4], [:PARA_END, '', 7, 0], [false, false, 7, 0]],
611
+ lex(";a\ntext"))
612
+ assert_equal([[:DL_START, '', 0, 0], [:DT_START, ';', 0, 1], [:TEXT, "a", 1, 1], [:DT_END, '', 2, 0],
613
+ [:DD_START, ':', 2, 1], [:INTLINK_START, '[[', 3, 2], [:TEXT, "resource", 5, 8], [:RESOURCESEP, ':', 13, 1],
614
+ [:TEXT, 'text', 14, 4], [:INTLINK_END, "]]", 18, 3], [:DD_END, '', 21, 0], [:DL_END, '', 21, 0], [false, false, 21, 0]],
615
+ lex(";a:[[resource:text]]\n"))
616
+ end
91
617
 
92
- def test_bullets_at_eof
93
- assert_equal(lex("* Foo\n*"),
94
- [[:UL_START, nil], [:LI_START, ""], [:TEXT, "Foo\n"], [:LI_END, ""], [:LI_START, ""], [:LI_END, ""], [:UL_END, ""], [false, false]])
95
- end
618
+ def test_toc_and_notoc
619
+ assert_equal([[:KEYWORD, "TOC", 0, 7], [false, false, 7, 0]], lex("__TOC__"))
620
+ assert_equal([[:KEYWORD, "NOTOC", 0, 9], [false, false, 9, 0]], lex("__NOTOC__"))
621
+ end
96
622
 
97
- private
98
- def lex(string)
99
- lexer = MediaWikiLexer.new
100
- lexer.tokenize(string)
101
- end
623
+ # This test should not pass. It works because of bug in mediawiki lexer.
624
+ # Mediawiki lexer builds invalid tree for indented html.
625
+ # Indentation is considered as preformatted text and preformatted text end tags has
626
+ # wrongs place.
627
+ # Last PREINDENT_END shoul be right after first :TAG_START for li
628
+ def test_formatted_html
629
+ formatted_html = "\
630
+ <ul>
631
+ <li>
632
+ </li>
633
+ </ul>
634
+ "
635
+ assert_equal([[:PARA_START, "", 0, 0], [:TAG_START, "ul", 0, 5], [:PREINDENT_START, "", 5, 0], [:TEXT, " ", 5, 4], [:TAG_START, "li", 9, 5], [:PREINDENT_START, "", 14, 0], [:TEXT, " ", 14, 4], [:PREINDENT_END, "", 18, 0], [:TAG_END, "li", 18, 6], [:PREINDENT_END, "", 24, 0], [:TAG_END, "ul", 24, 5], [:TEXT, "\n", 29, 1], [:PARA_END, "", 30, 0], [:PREINDENT_END, "", 30, 0], [false, false, 30, 0]],
636
+ lex(formatted_html))
637
+ end
638
+
639
+ private
640
+
641
+ def lex(string)
642
+ lexer = MediaWikiLexer.new
643
+ lexer.tokenize(string)
644
+ end
102
645
 
103
646
  end