breakout_parser 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,55 @@
1
+ #ifdef RUBY_VERSION
2
+
3
+ #include "ruby.h"
4
+
5
+ void Init_breakout_parser();
6
+ VALUE method_parse(VALUE, VALUE);
7
+
8
+ VALUE breakout_parser = Qnil;
9
+
10
+ void Init_breakout_parser() {
11
+ breakout_parser = rb_define_class("BreakoutParser",rb_cObject);
12
+ rb_define_singleton_method(breakout_parser, "parse", method_parse, 1);
13
+ }
14
+
15
+ extern char *buf, *bufptr;
16
+ extern char *in_buf, *in_pos;
17
+ extern size_t in_buf_len, bufsize;
18
+
19
+ VALUE method_parse(VALUE self, VALUE text) {
20
+ VALUE s;
21
+ char *p = RSTRING(text)->ptr;
22
+ in_buf_len = RSTRING(text)->len;
23
+ while( in_buf_len > 0 && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')){
24
+ p++;
25
+ in_buf_len--;
26
+ }
27
+ in_buf = p;
28
+ in_pos = in_buf;
29
+
30
+ bufsize = 1 + in_buf_len + in_buf_len/3; // reserve 30% of in_buf size
31
+ if(bufsize<0x100) bufsize = 0x100;
32
+
33
+ buf = ALLOC_N(char, bufsize);
34
+ bufptr = buf;
35
+
36
+ // protect buf from GC (theoretically)
37
+ rb_iv_set(self,"@obj",Data_Wrap_Struct(rb_cData,NULL,NULL,buf));
38
+
39
+ yyparse();
40
+ yylex_destroy();
41
+ // printf("[.] yyparse() ended\n");
42
+
43
+ // make ruby string from our char[] data
44
+ s = rb_str_new(buf,bufptr-buf);
45
+
46
+ // cleanup
47
+ rb_iv_set(self,"@obj",Qnil);
48
+ xfree(buf);
49
+ buf = bufptr = NULL;
50
+ bufsize = 0;
51
+
52
+ return s;
53
+ }
54
+
55
+ #endif // ifdef RUBY_VERSION
Binary file
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env ruby
2
+ require 'breakout_parser'
3
+ p BreakoutParser.parse("h1. aaa")
@@ -0,0 +1,3 @@
1
+ int yywrap(){
2
+ return 1;
3
+ }
Binary file
@@ -0,0 +1,6 @@
1
+ if RUBY_PLATFORM =~/(mswin|mingw)/i
2
+ # Fat binary gems, you make the Rockin' world go round
3
+ require "breakout_parser/win32-ruby#{RUBY_VERSION.sub(/\.\d+$/, '')}/breakout_parser"
4
+ else
5
+ require 'breakout_parser/breakout_parser'
6
+ end
@@ -0,0 +1,101 @@
1
+ describe 'BreakoutParser' do
2
+
3
+ describe "bad examples" do
4
+ Dir["examples/orig/*.bad"].sort.each do |fname|
5
+ it "should not die on #{fname} " do
6
+ data = File.read(fname)
7
+ parse_file(fname).size.should >= File.read(fname).strip.gsub(/\s+/,' ').size
8
+ end
9
+ end
10
+ end
11
+ describe "pending examples" do
12
+ Dir["examples/orig/*.pending"].sort.each do |fname|
13
+ it "should parse #{fname} "
14
+ end
15
+ end
16
+
17
+ describe "preparsed examples" do
18
+ Dir["examples/orig/*.txt"].sort.each do |fname|
19
+ bname = File.basename(fname)
20
+ it "should parse #{fname} " do
21
+ preparsed = File.read("examples/parsed/#{bname}")
22
+ preparsed = preparsed[3..-1] if preparsed[0..2] == '<p>'
23
+ preparsed = preparsed[0..-5] if preparsed[-4..-1] == '</p>'
24
+ preparsed.gsub!("&#8211;","-")
25
+ preparsed.gsub!("&#8212;","--")
26
+ preparsed.gsub!("&#8216;","'")
27
+ preparsed.gsub!("&#8217;","'")
28
+ preparsed.gsub!("&#8230;","...")
29
+ preparsed.gsub!("&#215;","x")
30
+ preparsed.gsub!("&#169;","(c)")
31
+ preparsed.gsub!("<br />\n","<br />")
32
+ preparsed.gsub!(/[ \t]+<br \/>/,"<br />")
33
+ preparsed.gsub!("\t"," ")
34
+ if preparsed['<hr />']
35
+ # find longest dash-line in source
36
+ dashline = File.read(fname).scan(/-+/).sort_by{ |x| -x.length }.first
37
+ preparsed.gsub!("</p>\n<hr />\n<p>","<br /><br />#{dashline}<br /><br />");
38
+ end
39
+
40
+ # preparsed.gsub!(/^<p>/,"");
41
+ # preparsed.gsub!(/<\/p>$/,"");
42
+ preparsed.gsub!("</pre>\n<ol>","</pre><br /><br /><ol>")
43
+ preparsed.gsub!(/<\/p>\s+<p>/,"<br /><br />")
44
+ preparsed.gsub!("</p>\n","<br /><br />")
45
+ preparsed.gsub!("<p>","<br /><br />")
46
+ preparsed.gsub!(/[\r\n]+ */," ")
47
+ preparsed.gsub!(/[ \t]{2,}/," ")
48
+
49
+ preparsed.gsub!("<del>","-")
50
+ preparsed.gsub!("</del>","-")
51
+ preparsed.gsub!(/<br \/>[ ]+/,"<br />")
52
+ preparsed.gsub!(/(<br \/>){2,}/,"<br /><br />")
53
+ # preparsed.gsub!("<br /><ol>","<ol>")
54
+ # preparsed.gsub!("<br /><ul>","<ul>")
55
+ # preparsed.gsub!("<br /><br /><ul>","<br /><ul>")
56
+
57
+ parsed = parse_file(fname)
58
+
59
+ # old parser not parses raw text urls
60
+ #parsed.gsub!(%r'<a href="([^<>"]+)">([^<>"]+)</a>',"\\1")
61
+
62
+ t1 = parsed
63
+ t2 = preparsed
64
+
65
+ [t1,t2].each do |t|
66
+ t.downcase!
67
+ t.gsub!(/(\s*<br \/>\s*)+/,' ')
68
+ t.gsub!(/\n\s*/,"\n")
69
+ # t.gsub!(/>[ \t]+</,"><")
70
+ t.gsub!(/>[ \t]+/,">")
71
+ t.gsub!(/[ \t]+</,"<")
72
+ t.gsub!(/[\r\n \t]+/," ")
73
+ t.strip!
74
+ end
75
+
76
+ if t1 != t2
77
+ # File.open("last-parsed.tmp","w"){ |f| f << parsed }
78
+ # File.open("last-preparsed.tmp","w"){ |f| f << preparsed }
79
+ pos = 0
80
+ pos += 1 while t1[0..pos] == t2[0..pos]
81
+ pos -= 5
82
+ pos = 0 if pos<0
83
+ t1[pos..-1].should == t2[pos..-1]
84
+ end
85
+ t1.should == t2
86
+ end
87
+ $n ||= 0
88
+ $n += 1
89
+ # break if $n == 1900
90
+ end
91
+ end
92
+
93
+ ###############################################################################
94
+ ###############################################################################
95
+ ###############################################################################
96
+
97
+ def parse_file fname
98
+ r = `cat #{fname} | ./parser`
99
+ r.strip
100
+ end
101
+ end
@@ -0,0 +1,549 @@
1
+ require 'breakout_parser'
2
+
3
+ describe 'BreakoutParser' do
4
+ def self.hex_string s
5
+ s.each_byte.to_a.map{ |c| "%02x" % c }.join
6
+ end
7
+ def hex_string s; self.class.hex_string(s); end
8
+
9
+ it 'converts \n to <br />' do
10
+ parse("aaa\nbbb").should match(%r"aaa ?<br /> ?bbb")
11
+ end
12
+
13
+ it "parses 1M file #1" do
14
+ s = 'a' * 1024 * 1024
15
+ parse(s).size.should == s.size
16
+ end
17
+
18
+ it "parses 1M file #2" do
19
+ s = 'a' + (' ' * 1024 * 1024) + 'b'
20
+ parse(s).should == 'a b'
21
+ end
22
+
23
+ it "parses 1M file #3" do
24
+ s = 'a ' * 1024 * 512
25
+ parse(s).size.should == s.strip.size
26
+ end
27
+
28
+ it "strips tailing spaces and newlines" do
29
+ parse("aaa ").should == "aaa"
30
+ parse("aaa\t\t\t\t\t\t").should == "aaa"
31
+ parse("aaa\r\r\r\r\r").should == "aaa"
32
+ parse("aaa\n\n\n\n\n").should == "aaa"
33
+ parse("aaa\r\n\r\n\r\n\r\n").should == "aaa"
34
+ parse("aaa\r\n\t \t \n \r \n \t \t\n\r ").should == "aaa"
35
+ end
36
+
37
+ it "strips leading spaces and newlines" do
38
+ parse(" aaa").should == "aaa"
39
+ parse("\t\t\t\t\t\taaa").should == "aaa"
40
+ parse("\r\r\r\r\raaa").should == "aaa"
41
+ parse("\n\n\n\n\naaa").should == "aaa"
42
+ parse("\r\n\r\n\r\n\r\naaa").should == "aaa"
43
+ parse("\r\n\t \t \n \r \n \t \t\n\r aaa").should == "aaa"
44
+ end
45
+
46
+ it "converts two or more \\n to single empty line" do
47
+ parse("aaa\n\nbbb").should == "aaa<br /><br />bbb"
48
+ parse("aaa\n \nbbb").should == "aaa<br /><br />bbb"
49
+ parse("aaa\n\n\nbbb").should == "aaa<br /><br />bbb"
50
+ parse("aaa\n \n \nbbb").should == "aaa<br /><br />bbb"
51
+ parse("aaa\r\n \r\n \r\nbbb").should == "aaa<br /><br />bbb"
52
+ parse("aaa\n \n\n \nbbb").should == "aaa<br /><br />bbb"
53
+ parse("aaa\n \n\n\n \nbbb").should == "aaa<br /><br />bbb"
54
+ parse("aaa\n\n\n\n\n\n\nbbb").should == "aaa<br /><br />bbb"
55
+ parse("aaa\r\n\r\n\r\nbbb").should == "aaa<br /><br />bbb"
56
+ end
57
+
58
+ ###############################################################################
59
+
60
+ describe "*bold*" do
61
+ it "only" do
62
+ parse("*bold*").should == '<strong>bold</strong>'
63
+ end
64
+ it "at beginning" do
65
+ parse("*bold*\nxxx").should == '<strong>bold</strong><br />xxx'
66
+ end
67
+ it "in the middle of text" do
68
+ parse("xxx *bold* yyy").should == 'xxx <strong>bold</strong> yyy'
69
+ end
70
+ it "parses *multiline\\nbold*" do
71
+ parse("*multiline\nbold*").should == "<strong>multiline<br />bold</strong>"
72
+ end
73
+ it "skips lone star inside bold block" do
74
+ parse("*aaa * bbb*").should == '<strong>aaa * bbb</strong>'
75
+ end
76
+ it "skips lone star" do
77
+ parse("aaa * bbb").should == 'aaa * bbb'
78
+ end
79
+ it "w/o closing tag" do
80
+ parse("*bold").should == '<strong>bold</strong>'
81
+ end
82
+ it "nesting1 w/o closing tags" do
83
+ parse("*bold1 *bold2").should == '<strong>bold1 <strong>bold2</strong></strong>'
84
+ end
85
+ it "nesting2 w/o closing tags" do
86
+ parse("*bold1 *bold2").should == '<strong>bold1 <strong>bold2</strong></strong>'
87
+ end
88
+
89
+ it "not parses '*.*'" do
90
+ parse("*.*").should == "*.*"
91
+ parse(" *.* ").should == "*.*"
92
+ parse("aaa *.* bbb").should == "aaa *.* bbb"
93
+ end
94
+
95
+ it "not parses '*.something'" do
96
+ parse("*.exe").should == "*.exe"
97
+ parse(" *.exe ").should == "*.exe"
98
+ parse("aaa *.exe bbb").should == "aaa *.exe bbb"
99
+ end
100
+
101
+ end
102
+
103
+ ###############################################################################
104
+
105
+ describe "_italic_" do
106
+ it "only" do
107
+ parse("_italic_").should == '<em>italic</em>'
108
+ end
109
+ it "at beginning" do
110
+ parse("_italic_\nxxx").should == '<em>italic</em><br />xxx'
111
+ end
112
+ it "in the middle of text" do
113
+ parse("xxx _italic_ yyy").should == 'xxx <em>italic</em> yyy'
114
+ end
115
+ it "parses _multiline\\nitalic_" do
116
+ parse("_multiline\nitalic_").should == "<em>multiline<br />italic</em>"
117
+ end
118
+ it "skips lone underscore inside italic block" do
119
+ parse("_aaa _ bbb_").should == '<em>aaa _ bbb</em>'
120
+ end
121
+ it "skips lone underscore" do
122
+ parse("aaa _ bbb").should == 'aaa _ bbb'
123
+ end
124
+ it "w/o closing tag" do
125
+ parse("_italic").should == '<em>italic</em>'
126
+ end
127
+ it "nesting1 w/o closing tags" do
128
+ parse("_italic1 _italic2").should == '<em>italic1 <em>italic2</em></em>'
129
+ end
130
+ it "nesting2 w/o closing tags" do
131
+ parse("_italic1 _italic2").should == '<em>italic1 <em>italic2</em></em>'
132
+ end
133
+ end
134
+
135
+ ###############################################################################
136
+
137
+ describe "combinations" do
138
+ it "bold in italic" do
139
+ s = "_aaa *bbb* ccc_"
140
+ parse(s).should == "<em>aaa <strong>bbb</strong> ccc</em>"
141
+ end
142
+ it "bold in italic - no closing1" do
143
+ s = "_aaa *bbb* ccc"
144
+ parse(s).should == "<em>aaa <strong>bbb</strong> ccc</em>"
145
+ end
146
+ it "bold in italic - no closing2" do
147
+ s = "_aaa *bbb ccc"
148
+ parse(s).should == "<em>aaa <strong>bbb ccc</strong></em>"
149
+ end
150
+
151
+ it "italic in bold" do
152
+ s = "*aaa _bbb_ ccc*"
153
+ parse(s).should == "<strong>aaa <em>bbb</em> ccc</strong>"
154
+ end
155
+ it "italic in bold - no closing1" do
156
+ s = "*aaa _bbb_ ccc"
157
+ parse(s).should == "<strong>aaa <em>bbb</em> ccc</strong>"
158
+ end
159
+ it "italic in bold - no closing2" do
160
+ s = "*aaa _bbb ccc"
161
+ parse(s).should == "<strong>aaa <em>bbb ccc</em></strong>"
162
+ end
163
+
164
+ {'ul' => '*', 'ol' => '#'}.each do |l,c|
165
+ it "raw text link inside #{l.upcase}> #1" do
166
+ s = "#{c} aaa http://www.ru"
167
+ parse(s).should == "<#{l}><li>aaa <a href=\"http://www.ru\">http://www.ru</a></li></#{l}>"
168
+ end
169
+ it "raw text link inside #{l.upcase}> #2" do
170
+ s = "#{c} aaa http://www.ru\n#{c} bbb"
171
+ parse(s).should == "<#{l}><li>aaa <a href=\"http://www.ru\">http://www.ru</a></li><li>bbb</li></#{l}>"
172
+ end
173
+ it "raw text link inside #{l.upcase}> #3" do
174
+ s = "#{c} http://www.ru"
175
+ parse(s).should == "<#{l}><li><a href=\"http://www.ru\">http://www.ru</a></li></#{l}>"
176
+ end
177
+ it "raw text link inside #{l.upcase}> #4" do
178
+ s = "#{c} aaa http://www.ru bbb"
179
+ parse(s).should == "<#{l}><li>aaa <a href=\"http://www.ru\">http://www.ru</a> bbb</li></#{l}>"
180
+ end
181
+ it "two links inside #{l.upcase}>" do
182
+ s = "#{c} aaa http://www.ru http://ya.ru bbb"
183
+ parse(s).should == "<#{l}><li>aaa <a href=\"http://www.ru\">http://www.ru</a> <a href=\"http://ya.ru\">http://ya.ru</a> bbb</li></#{l}>"
184
+ end
185
+ end
186
+ end
187
+
188
+ ###############################################################################
189
+
190
+ describe "unnumbered list" do
191
+ it "should work" do
192
+ parse("* a\n* b\n* c").should match(
193
+ %r"<ul><li>a</li><li>b</li><li>c</li></ul>"
194
+ )
195
+ end
196
+ it "two lists" do
197
+ s = "* a\n* b\n* c"
198
+ s = s + "\nxxx\n" + s
199
+ r = "<ul><li>a</li><li>b</li><li>c</li></ul>"
200
+ parse(s).should == "#{r}xxx<br />#{r}"
201
+ end
202
+ it "in middle of text when begins with space" do
203
+ parse("hello\n * a\n * b\n * c\nworld").should ==
204
+ "hello<br /><ul><li>a</li><li>b</li><li>c</li></ul>world"
205
+ end
206
+ it "in middle of text" do
207
+ parse("hello\n* a\n* b\n* c\nworld").should ==
208
+ "hello<br /><ul><li>a</li><li>b</li><li>c</li></ul>world"
209
+ end
210
+ it "after blank line" do
211
+ parse("hello\n\n * a\n * b\n * c\nworld").should ==
212
+ "hello<br /><br /><ul><li>a</li><li>b</li><li>c</li></ul>world"
213
+ end
214
+ end
215
+
216
+ ###############################################################################
217
+
218
+ describe "numbered list" do
219
+ it "should work" do
220
+ parse("# a\n# b\n# c").should match(
221
+ %r"<ol><li>a</li><li>b</li><li>c</li></ol>"
222
+ )
223
+ end
224
+ it "two lists" do
225
+ s = "# a\n# b\n# c"
226
+ s = s + "\nxxx\n" + s
227
+ r = "<ol><li>a</li><li>b</li><li>c</li></ol>"
228
+ parse(s).should == "#{r}xxx<br />#{r}"
229
+ end
230
+ it "in middle of text when begins with space" do
231
+ parse("hello\n # a\n # b\n # c\nworld").should ==
232
+ "hello<br /><ol><li>a</li><li>b</li><li>c</li></ol>world"
233
+ end
234
+ it "in middle of text" do
235
+ parse("hello\n# a\n# b\n# c\nworld").should ==
236
+ "hello<br /><ol><li>a</li><li>b</li><li>c</li></ol>world"
237
+ end
238
+ it "after blank line" do
239
+ parse("hello\n\n # a\n # b\n # c\nworld").should ==
240
+ "hello<br /><br /><ol><li>a</li><li>b</li><li>c</li></ol>world"
241
+ end
242
+ end
243
+
244
+ ###############################################################################
245
+
246
+ 1.upto(5) do |lvl|
247
+ describe "H#{lvl}" do
248
+ it "at the beginning" do
249
+ parse("h#{lvl}. xxx").should == "<h#{lvl} id=\"xxx\">xxx</h#{lvl}>"
250
+ end
251
+ it "after 1 line of text" do
252
+ parse("abcd\nh#{lvl}. xxx").should == "abcd<br /><h#{lvl} id=\"xxx\">xxx</h#{lvl}>"
253
+ end
254
+ it "after 2 lines of text" do
255
+ parse("abcd\ndefgh\nh#{lvl}. xxx").should == "abcd<br />defgh<br /><h#{lvl} id=\"xxx\">xxx</h#{lvl}>"
256
+ end
257
+ it "in middle of other words" do
258
+ parse("abcd defgh h#{lvl}. xxx yyy").should == "abcd defgh h#{lvl}. xxx yyy"
259
+ end
260
+ it "in middle of other lines" do
261
+ parse("abcd defgh\nh#{lvl}. xxx\nyyy").should == "abcd defgh<br /><h#{lvl} id=\"xxx\">xxx</h#{lvl}><br />yyy"
262
+ end
263
+
264
+ it "converts spaces to underscores in id" do
265
+ parse("h#{lvl}. xxx yyy z").should == "<h#{lvl} id=\"xxx___yyy_z\">xxx yyy z</h#{lvl}>"
266
+ end
267
+ it "keeps underscores in id" do
268
+ parse("h#{lvl}. xxx___yyy_z").should == "<h#{lvl} id=\"xxx___yyy_z\">xxx___yyy_z</h#{lvl}>"
269
+ end
270
+ it "keeps dashes in id" do
271
+ parse("h#{lvl}. xxx---yyy-z").should == "<h#{lvl} id=\"xxx---yyy-z\">xxx---yyy-z</h#{lvl}>"
272
+ end
273
+ it "keeps dots in id" do
274
+ parse("h#{lvl}. xxx...yyy.z").should == "<h#{lvl} id=\"xxx...yyy.z\">xxx...yyy.z</h#{lvl}>"
275
+ end
276
+
277
+ %w'Ъ ъ : ; , привет" \' ! < >'.each do |c|
278
+ it "converts id to hex if it contains \"#{c}\"" do
279
+ idhex = hex_string("xxx#{c}yyy")
280
+ parse("h#{lvl}. xxx#{c}yyy").should == "<h#{lvl} id=\"#{idhex}\">xxx#{h(c)}yyy</h#{lvl}>"
281
+ end
282
+ end
283
+
284
+ it "skips excess spaces" do
285
+ parse("h#{lvl}. \t xxx \t ").should == "<h#{lvl} id=\"xxx\">xxx</h#{lvl}>"
286
+ end
287
+
288
+ it "thinks that \\r is EOL" do
289
+ parse("h#{lvl}. xxx\ryyy").should == "<h#{lvl} id=\"xxx\">xxx</h#{lvl}><br />yyy"
290
+ parse("h#{lvl}. xxx\r").should == "<h#{lvl} id=\"xxx\">xxx</h#{lvl}>"
291
+
292
+ parse("h#{lvl}. xxx\r yyy").sub(' yyy','yyy').should ==
293
+ "<h#{lvl} id=\"xxx\">xxx</h#{lvl}><br />yyy"
294
+ end
295
+ end
296
+ end
297
+
298
+ ###############################################################################
299
+
300
+ describe "raw text links" do
301
+ it "at the beginning" do
302
+ parse("http://www.ru").should == "<a href=\"http://www.ru\">http://www.ru</a>"
303
+ end
304
+ it "in middle of other words" do
305
+ parse("aaa bbb ccc http://www.ru ddd eee fff").should ==
306
+ "aaa bbb ccc <a href=\"http://www.ru\">http://www.ru</a> ddd eee fff"
307
+ end
308
+ it "in new line" do
309
+ parse("aaa bbb ccc\nhttp://www.ru\nddd eee fff").should match(
310
+ %r"aaa bbb ccc ?<br /> ?<a href=\"http://www.ru\">http://www.ru</a> ?<br /> ?ddd eee fff"
311
+ )
312
+ end
313
+ it "escapes '&' in link _text_" do
314
+ parse("http://www.ru/?a=1&b=2").should == "<a href=\"http://www.ru/?a=1&b=2\">http://www.ru/?a=1&amp;b=2</a>"
315
+ end
316
+
317
+ it "parses https://" do
318
+ parse("https://www.ru").should == "<a href=\"https://www.ru\">https://www.ru</a>"
319
+ end
320
+
321
+ %w', .'.each do |c|
322
+ it "stops parsing on \"#{c} \"" do
323
+ parse("http://www.ru#{c}").should == "<a href=\"http://www.ru\">http://www.ru</a>#{c}"
324
+ parse(" http://www.ru#{c} ").should == "<a href=\"http://www.ru\">http://www.ru</a>#{c}"
325
+ parse(" http://www.ru#{c} hello!").should == "<a href=\"http://www.ru\">http://www.ru</a>#{c} hello!"
326
+ parse("xxx http://www.ru#{c} hello!").should == "xxx <a href=\"http://www.ru\">http://www.ru</a>#{c} hello!"
327
+ parse(" http://www.ru/#{c} hello!").should == "<a href=\"http://www.ru/\">http://www.ru/</a>#{c} hello!"
328
+ parse(" http://aaa.com#{c} http://bbb.com").should ==
329
+ "<a href=\"http://aaa.com\">http://aaa.com</a>#{c} <a href=\"http://bbb.com\">http://bbb.com</a>"
330
+ end
331
+ end
332
+ end
333
+
334
+ ###############################################################################
335
+
336
+ describe "#ticketNum ticket links" do
337
+ it "at the beginning" do
338
+ parse("#1234").should == '<a href="/spaces/test_space/tickets/1234">#1234</a>'
339
+ end
340
+ it "in middle of other words" do
341
+ parse("aaa bbb ccc #3476 ddd eee fff").should ==
342
+ 'aaa bbb ccc <a href="/spaces/test_space/tickets/3476">#3476</a> ddd eee fff'
343
+ end
344
+ it "in new line" do
345
+ parse("aaa bbb ccc\n#1234\nddd eee fff").should match(
346
+ %r|aaa bbb ccc ?<br /> ?<a href="/spaces/test_space/tickets/1234">#1234</a> ?<br /> ?ddd eee fff|
347
+ )
348
+ end
349
+ it "ignores non-digits" do
350
+ parse("#1234d").should == '#1234d'
351
+ parse("#xxx").should == '#xxx'
352
+ end
353
+ end
354
+
355
+ ###############################################################################
356
+
357
+ describe "<pre><code>..</code></pre>" do
358
+ it "works" do
359
+ s = <<-EOF
360
+ for ( n = 0; n < max_size && \
361
+ (c = getc( yyin )) != EOF && c != '\\n'; ++n ) \
362
+ buf[n] = (char) c; \
363
+
364
+ EOF
365
+
366
+ parse("<pre><code>#{s.strip}</code></pre>").should ==
367
+ "<pre><code>#{h(s.strip)}</code></pre>"
368
+
369
+ s = <<-EOF
370
+ while ( 1 < 2 ) do
371
+ puts "<b>12345\\t54321</b>"
372
+ // *bold* comment
373
+ // _italic_ comment
374
+ end
375
+ ---
376
+ * aaa
377
+ * bbb
378
+ * ccc
379
+
380
+ EOF
381
+ parse("<pre><code>#{s.strip}</code></pre>").should ==
382
+ "<pre><code>#{h(s.strip)}</code></pre>"
383
+ end
384
+ it "not parses *bold*" do
385
+ s = "<pre><code> *bold*</code></pre>"
386
+ parse(s).should == s
387
+ end
388
+ it "not parses _italic_" do
389
+ s = "<pre><code> _italic_</code></pre>"
390
+ parse(s).should == s
391
+ end
392
+ it "not parses UL lists" do
393
+ s = "<pre><code>\n * l1\n * l2\n * l3</code></pre>"
394
+ parse(s).should == s.sub("<code>\n","<code>")
395
+ end
396
+ it "not parses OL lists" do
397
+ s = "<pre><code>\n # l1\n # l2\n # l3</code></pre>"
398
+ parse(s).should == s.sub("<code>\n","<code>")
399
+ end
400
+ it "not parses H1..H5" do
401
+ 1.upto(5) do |i|
402
+ s = "<pre><code>\nh#{i}. zzzzzzz\n</code></pre>"
403
+ parse(s).should == "<pre><code>h#{i}. zzzzzzz</code></pre>"
404
+ end
405
+ end
406
+ it "not parses raw text links" do
407
+ s = "<pre><code>xxx http://www.ru yyy</code></pre>"
408
+ parse(s).should == s
409
+ s = "<pre><code>http://www.ru</code></pre>"
410
+ parse(s).should == s
411
+ end
412
+ it "keeps newlines" do
413
+ s = "<pre><code>aaa\nbbb</code></pre>"
414
+ parse(s).should == s
415
+ s = "<pre><code>aaa\n\nbbb\nccc</code></pre>"
416
+ parse(s).should == s
417
+ end
418
+
419
+ it "with no spaces between <pre> and <code>" do
420
+ s = "<pre><code>aaa</code></pre>"
421
+ parse(s).should == s
422
+ end
423
+
424
+ it "with spaces between <pre> and <code>" do
425
+ s = "<pre> <code>aaa</code> </pre>"
426
+ parse(s).should == s.tr(' ','')
427
+ end
428
+ it "with spaces between <pre> and <code> and inside" do
429
+ s = "<pre> <code> aaa bbb </code> </pre>"
430
+ parse(s).should == "<pre><code> aaa bbb</code></pre>"
431
+ end
432
+
433
+ it "w/o closing tags" do
434
+ s = "<pre><code>aaa"
435
+ parse(s).should match(%r"<pre><code>aaa\n?</code></pre>")
436
+ end
437
+
438
+ it "in middle of text" do
439
+ s = "xxx <pre><code>yyyy</code></pre> jjj"
440
+ parse(s).should == s
441
+ end
442
+
443
+ it "with 2 instances" do
444
+ s = "xxx <pre><code>yyyy</code></pre> <jjj> <pre><code>asdkjaslkd</code></pre> END"
445
+ parse(s).should == s.sub('<jjj>','&lt;jjj&gt;')
446
+ end
447
+
448
+ it "works with unicode" do
449
+ s = "привет <pre><code> жжж </code></pre> пока!"
450
+ parse(s).should match(%r|привет ?<pre><code> жжж</code></pre> ?пока!|)
451
+
452
+ s = 'абвгдеёжзийклмнопрстуфхцчшщьыъэюя'
453
+ parse(s).should == s
454
+
455
+ s = 'АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЬЫЪЭЮЯ'
456
+ parse(s).should == s
457
+
458
+ s = '☸☹☺☻☼☽☾☿'
459
+ parse(s).should == s
460
+ end
461
+
462
+ it "should escape lone closing tags" do
463
+ s = "</code></pre>"
464
+ parse(s).should == h(s)
465
+ end
466
+
467
+ it "should skip newlines and spaces at end" do
468
+ s = "<pre><code> aaa bbb ccc \n\n\n \t\n\n\n\r\n\r\n \t </code></pre>"
469
+ parse(s).should == "<pre><code> aaa bbb ccc</code></pre>"
470
+ end
471
+
472
+ it "escapes html chars" do
473
+ HTML_ESCAPE.each do |k,v|
474
+ parse("<pre><code>#{k}</code></pre>").should == "<pre><code>#{v}</code></pre>"
475
+ end
476
+ end
477
+ end
478
+
479
+ ###############################################################################
480
+
481
+ describe "Assembla Links" do
482
+ a = {}
483
+ a["wiki:Name"] = '<a class="wiki_link" title="Name" href="/wiki/show/test_space/Name">Name</a>'
484
+ a["Name"] = '<a class="wiki_link" title="Name" href="/wiki/show/test_space/Name">Name</a>'
485
+ a["Name#Ref"] = '<a class="wiki_link" title="Name#Ref" href="/wiki/show/test_space/Name#Ref">Name#Ref</a>'
486
+ a["#Ref"] = '<a href="#Ref" title="#Ref" class="wiki_link">#Ref</a>'
487
+ a["#привет"] = %Q|<a href="##{hex_string("привет")}" title="#привет" class="wiki_link">#привет</a>|
488
+ a["#with spc"] = %Q|<a href="#with__spc" title="#with spc" class="wiki_link">#with spc</a>|
489
+ a["#with__usc"] = %Q|<a href="#with__usc" title="#with__usc" class="wiki_link">#with__usc</a>|
490
+ a["#with--dsh"] = %Q|<a href="#with--dsh" title="#with--dsh" class="wiki_link">#with--dsh</a>|
491
+ a["#with!xclm"] = %Q|<a href="##{hex_string("with!xclm")}" title="#with!xclm" class="wiki_link">#with!xclm</a>|
492
+ a["#with&amp"] = %Q|<a href="##{hex_string("with&amp")}" title="#with&amp" class="wiki_link">#with&amp;amp</a>|
493
+
494
+ a["ticket:234"] = '<a href="/spaces/test_space/tickets/234">#234</a>'
495
+ a["revision:1f4bdab77be696efd"] =
496
+ '<a href="http://code.assembla.com/test_space/git/changesets/1f4bdab77be696efd">revision:1f4bdab77be696efd</a>'
497
+ a["revision:12345"] =
498
+ '<a href="http://code.assembla.com/test_space/svn/changesets/12345">revision:12345</a>'
499
+ a["r:2345"] = '<a href="http://code.assembla.com/test_space/svn/changesets/2345">revision:2345</a>'
500
+ a["r:2345ef"] = '<a href="http://code.assembla.com/test_space/git/changesets/2345ef">revision:2345ef</a>'
501
+
502
+ a["url:http://www.ru"] = '<a href="http://www.ru">http://www.ru</a>'
503
+ a["url:https://www.ru"] = '<a href="https://www.ru">https://www.ru</a>'
504
+ a["url:www.ru"] = '<a href="http://www.ru">http://www.ru</a>'
505
+ a["url:www.ru/?a=1&b=2"] = '<a href="http://www.ru/?a=1&b=2">http://www.ru/?a=1&amp;b=2</a>'
506
+ a["url:ftp://www.ru"] = '<a href="ftp://www.ru">ftp://www.ru</a>'
507
+ a["url:/spaces/x2"] = '<a href="/spaces/x2">/spaces/x2</a>'
508
+
509
+ a.each do |k,v|
510
+ it "parses [[#{k}]]" do
511
+ parse("[[#{k}]]").should == v
512
+ end
513
+ it "parses [[#{k}|привет тест]]" do
514
+ parse("[[#{k}|привет тест]]").should == v.sub(/>.*</,">привет тест<")
515
+ end
516
+ it "parses [[#{k}|test & here]]" do
517
+ parse("[[#{k}|test & here]]").should == v.sub(/>.*</,">test &amp; here<")
518
+ end
519
+ end
520
+
521
+ it "keeps unknown link types" do
522
+ s = "[[zzz:xxx]]"
523
+ parse(s).should == s
524
+ s = "[[abcd:1234]]"
525
+ parse(s).should == s
526
+ s = "[[abcd::1234]] [[abcd:1234]] [[uri:www.ru]]"
527
+ parse(s).should == s
528
+ end
529
+
530
+ it "links to ExistingFile.txt"
531
+ it "links to NotExistingFile.txt"
532
+ it "links to ExistingImage.png"
533
+ it "links to NotExistingImage.png"
534
+ end
535
+
536
+ ###############################################################################
537
+ ###############################################################################
538
+ ###############################################################################
539
+
540
+ HTML_ESCAPE = { '&' => '&amp;', '>' => '&gt;', '<' => '&lt;', '"' => '&quot;' }
541
+
542
+ def h s
543
+ s.to_s.gsub(/[&"><]/) { |special| HTML_ESCAPE[special] }
544
+ end
545
+
546
+ def parse s
547
+ BreakoutParser.parse(s).strip
548
+ end
549
+ end