breakout_parser 0.0.0-x86-mswin32

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,16 @@
1
+ wiki
2
+ pkg
3
+ lex.yy.c
4
+ *.tab.c
5
+ *.tab.h
6
+ *.tmp
7
+ ext/breakout_parser/*.so
8
+ *.o
9
+ *.def
10
+ *.exp
11
+ *.lib
12
+ *.pdb
13
+ *.obj
14
+ *.manifest
15
+ *.gemspec
16
+ Makefile
data/LICENSE ADDED
@@ -0,0 +1,39 @@
1
+ Copyright (c) 2010 Assembla, Inc.
2
+
3
+ SINGLETON DEVELOPMENT LICENSE
4
+
5
+ Redistribution and use in source and binary forms, with or without
6
+ modification, are permitted provided that the following conditions are met:
7
+
8
+ * This is a license for research and development use only, and does not
9
+ include a license for commercial use. If you desire a license for commercial
10
+ use, you must contact the copyright owner for a commercial use license,
11
+ which would supersede the terms of this license. "Commercial Use" means any
12
+ use (internal or external), copying, sublicensing or distribution
13
+ (internal or external), directly or indirectly, for commercial or strategic
14
+ gain or advantage, including use in internal operations or in providing
15
+ products or services to any third party. Research and development for
16
+ eventual commercial use is not "Commercial Use" so long as a commercial
17
+ use license is obtained prior to commercial use. Redistribution to others
18
+ for their research and development use is not "Commercial Use".
19
+
20
+ * Redistributions of source code must retain the above copyright notice,
21
+ this list of conditions and the following disclaimer.
22
+
23
+ * Redistributions in binary form must reproduce the above copyright notice,
24
+ this list of conditions and the following disclaimer in the documentation
25
+ and/or other materials provided with the distribution. Redistribution in
26
+ binary form does not require redistribution of source code.
27
+
28
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
38
+ THE POSSIBILITY OF SUCH DAMAGE.
39
+
data/Rakefile ADDED
@@ -0,0 +1,99 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+
7
+ def gen_tasks
8
+ Jeweler::Tasks.new do |gem|
9
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
10
+ gem.name = "breakout_parser"
11
+ gem.platform = ENV['PLATFORM'] if ENV['PLATFORM']
12
+ gem.summary = %Q{BreakoutParser}
13
+ gem.description = %Q{BreakoutParser}
14
+ gem.email = "zed.0xff@gmail.com"
15
+ gem.homepage = "http://assembla.com"
16
+ gem.authors = ["Andrey \"Zed\" Zaikin"]
17
+ gem.add_development_dependency "rspec", ">= 1.2.9"
18
+ gem.test_files.delete 'spec/parser_examples_spec.rb'
19
+ gem.files.delete_if{ |f| f[0..8] == 'examples/' }
20
+ gem.files.delete_if{ |f| f[0..4] == 'misc/' }
21
+ if gem.platform == 'ruby'
22
+ gem.files.include 'ext/**/*'
23
+ gem.files.delete "ext/breakout_parser/Makefile"
24
+ gem.files.delete_if{ |f| f[-3..-1] == '.so' }
25
+ else
26
+ gem.files.include 'lib/**/*.so'
27
+ gem.extensions = '.' # HACK: package no extensions
28
+ end
29
+ end
30
+ Jeweler::GemcutterTasks.new
31
+ end
32
+ gen_tasks
33
+ rescue LoadError
34
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
35
+ end
36
+
37
+ require 'spec/rake/spectask'
38
+ Spec::Rake::SpecTask.new(:spec) do |spec|
39
+ spec.libs << 'lib' << 'spec'
40
+ spec.spec_files = FileList['spec/**/*_spec.rb']
41
+ end
42
+
43
+ Spec::Rake::SpecTask.new(:rcov) do |spec|
44
+ spec.libs << 'lib' << 'spec'
45
+ spec.pattern = 'spec/**/*_spec.rb'
46
+ spec.rcov = true
47
+ end
48
+
49
+ task :spec => :check_dependencies
50
+
51
+ task :default => :spec
52
+
53
+ require 'rake/rdoctask'
54
+ Rake::RDocTask.new do |rdoc|
55
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
56
+
57
+ rdoc.rdoc_dir = 'rdoc'
58
+ rdoc.title = "breakout_parser #{version}"
59
+ # rdoc.rdoc_files.include('README*')
60
+ rdoc.rdoc_files.include('lib/**/*.rb')
61
+ end
62
+
63
+ ######################
64
+
65
+ namespace :build do
66
+ desc "Build all gem variants"
67
+ task :all do
68
+ Rake::Task[ :build ].execute
69
+
70
+ @gems_to_push = []
71
+ @gems_to_push << Rake.application.jeweler.gemspec_helper.gem_path
72
+
73
+ gem = Rake.application.jeweler_tasks.gemspec
74
+ gem.files.delete_if{ |f| f[0..3] == 'ext/' }
75
+ gem.extensions = []
76
+ gem.files.include 'lib/**/*.so'
77
+
78
+ gem.original_platform = nil
79
+ gem.platform = 'x86-mingw32'
80
+ Rake::Task[ :build ].execute
81
+ @gems_to_push << Rake.application.jeweler.gemspec_helper.gem_path
82
+
83
+ gem.original_platform = nil
84
+ gem.platform = 'x86-mswin32'
85
+ Rake::Task[ :build ].execute
86
+ @gems_to_push << Rake.application.jeweler.gemspec_helper.gem_path
87
+ end
88
+ end
89
+
90
+ namespace 'gemcutter:release' do
91
+ desc "Release all gem variants"
92
+ task :all => 'build:all' do
93
+ @gems_to_push.each do |fname|
94
+ command = "gem push #{fname}"
95
+ puts "Executing #{command.inspect}:"
96
+ sh command
97
+ end
98
+ end
99
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.0
@@ -0,0 +1,6 @@
1
+ if RUBY_PLATFORM =~/(mswin|mingw)/i
2
+ # Fat binary gems, you make the Rockin' world go round
3
+ require "breakout_parser/win32-ruby#{RUBY_VERSION.sub(/\.\d+$/, '')}/breakout_parser"
4
+ else
5
+ require 'breakout_parser/breakout_parser'
6
+ end
@@ -0,0 +1,101 @@
1
+ describe 'BreakoutParser' do
2
+
3
+ describe "bad examples" do
4
+ Dir["examples/orig/*.bad"].sort.each do |fname|
5
+ it "should not die on #{fname} " do
6
+ data = File.read(fname)
7
+ parse_file(fname).size.should >= File.read(fname).strip.gsub(/\s+/,' ').size
8
+ end
9
+ end
10
+ end
11
+ describe "pending examples" do
12
+ Dir["examples/orig/*.pending"].sort.each do |fname|
13
+ it "should parse #{fname} "
14
+ end
15
+ end
16
+
17
+ describe "preparsed examples" do
18
+ Dir["examples/orig/*.txt"].sort.each do |fname|
19
+ bname = File.basename(fname)
20
+ it "should parse #{fname} " do
21
+ preparsed = File.read("examples/parsed/#{bname}")
22
+ preparsed = preparsed[3..-1] if preparsed[0..2] == '<p>'
23
+ preparsed = preparsed[0..-5] if preparsed[-4..-1] == '</p>'
24
+ preparsed.gsub!("&#8211;","-")
25
+ preparsed.gsub!("&#8212;","--")
26
+ preparsed.gsub!("&#8216;","'")
27
+ preparsed.gsub!("&#8217;","'")
28
+ preparsed.gsub!("&#8230;","...")
29
+ preparsed.gsub!("&#215;","x")
30
+ preparsed.gsub!("&#169;","(c)")
31
+ preparsed.gsub!("<br />\n","<br />")
32
+ preparsed.gsub!(/[ \t]+<br \/>/,"<br />")
33
+ preparsed.gsub!("\t"," ")
34
+ if preparsed['<hr />']
35
+ # find longest dash-line in source
36
+ dashline = File.read(fname).scan(/-+/).sort_by{ |x| -x.length }.first
37
+ preparsed.gsub!("</p>\n<hr />\n<p>","<br /><br />#{dashline}<br /><br />");
38
+ end
39
+
40
+ # preparsed.gsub!(/^<p>/,"");
41
+ # preparsed.gsub!(/<\/p>$/,"");
42
+ preparsed.gsub!("</pre>\n<ol>","</pre><br /><br /><ol>")
43
+ preparsed.gsub!(/<\/p>\s+<p>/,"<br /><br />")
44
+ preparsed.gsub!("</p>\n","<br /><br />")
45
+ preparsed.gsub!("<p>","<br /><br />")
46
+ preparsed.gsub!(/[\r\n]+ */," ")
47
+ preparsed.gsub!(/[ \t]{2,}/," ")
48
+
49
+ preparsed.gsub!("<del>","-")
50
+ preparsed.gsub!("</del>","-")
51
+ preparsed.gsub!(/<br \/>[ ]+/,"<br />")
52
+ preparsed.gsub!(/(<br \/>){2,}/,"<br /><br />")
53
+ # preparsed.gsub!("<br /><ol>","<ol>")
54
+ # preparsed.gsub!("<br /><ul>","<ul>")
55
+ # preparsed.gsub!("<br /><br /><ul>","<br /><ul>")
56
+
57
+ parsed = parse_file(fname)
58
+
59
+ # old parser not parses raw text urls
60
+ #parsed.gsub!(%r'<a href="([^<>"]+)">([^<>"]+)</a>',"\\1")
61
+
62
+ t1 = parsed
63
+ t2 = preparsed
64
+
65
+ [t1,t2].each do |t|
66
+ t.downcase!
67
+ t.gsub!(/(\s*<br \/>\s*)+/,' ')
68
+ t.gsub!(/\n\s*/,"\n")
69
+ # t.gsub!(/>[ \t]+</,"><")
70
+ t.gsub!(/>[ \t]+/,">")
71
+ t.gsub!(/[ \t]+</,"<")
72
+ t.gsub!(/[\r\n \t]+/," ")
73
+ t.strip!
74
+ end
75
+
76
+ if t1 != t2
77
+ # File.open("last-parsed.tmp","w"){ |f| f << parsed }
78
+ # File.open("last-preparsed.tmp","w"){ |f| f << preparsed }
79
+ pos = 0
80
+ pos += 1 while t1[0..pos] == t2[0..pos]
81
+ pos -= 5
82
+ pos = 0 if pos<0
83
+ t1[pos..-1].should == t2[pos..-1]
84
+ end
85
+ t1.should == t2
86
+ end
87
+ $n ||= 0
88
+ $n += 1
89
+ # break if $n == 1900
90
+ end
91
+ end
92
+
93
+ ###############################################################################
94
+ ###############################################################################
95
+ ###############################################################################
96
+
97
+ def parse_file fname
98
+ r = `cat #{fname} | ./parser`
99
+ r.strip
100
+ end
101
+ end
@@ -0,0 +1,549 @@
1
+ require 'breakout_parser'
2
+
3
+ describe 'BreakoutParser' do
4
+ def self.hex_string s
5
+ s.each_byte.to_a.map{ |c| "%02x" % c }.join
6
+ end
7
+ def hex_string s; self.class.hex_string(s); end
8
+
9
+ it 'converts \n to <br />' do
10
+ parse("aaa\nbbb").should match(%r"aaa ?<br /> ?bbb")
11
+ end
12
+
13
+ it "parses 1M file #1" do
14
+ s = 'a' * 1024 * 1024
15
+ parse(s).size.should == s.size
16
+ end
17
+
18
+ it "parses 1M file #2" do
19
+ s = 'a' + (' ' * 1024 * 1024) + 'b'
20
+ parse(s).should == 'a b'
21
+ end
22
+
23
+ it "parses 1M file #3" do
24
+ s = 'a ' * 1024 * 512
25
+ parse(s).size.should == s.strip.size
26
+ end
27
+
28
+ it "strips tailing spaces and newlines" do
29
+ parse("aaa ").should == "aaa"
30
+ parse("aaa\t\t\t\t\t\t").should == "aaa"
31
+ parse("aaa\r\r\r\r\r").should == "aaa"
32
+ parse("aaa\n\n\n\n\n").should == "aaa"
33
+ parse("aaa\r\n\r\n\r\n\r\n").should == "aaa"
34
+ parse("aaa\r\n\t \t \n \r \n \t \t\n\r ").should == "aaa"
35
+ end
36
+
37
+ it "strips leading spaces and newlines" do
38
+ parse(" aaa").should == "aaa"
39
+ parse("\t\t\t\t\t\taaa").should == "aaa"
40
+ parse("\r\r\r\r\raaa").should == "aaa"
41
+ parse("\n\n\n\n\naaa").should == "aaa"
42
+ parse("\r\n\r\n\r\n\r\naaa").should == "aaa"
43
+ parse("\r\n\t \t \n \r \n \t \t\n\r aaa").should == "aaa"
44
+ end
45
+
46
+ it "converts two or more \\n to single empty line" do
47
+ parse("aaa\n\nbbb").should == "aaa<br /><br />bbb"
48
+ parse("aaa\n \nbbb").should == "aaa<br /><br />bbb"
49
+ parse("aaa\n\n\nbbb").should == "aaa<br /><br />bbb"
50
+ parse("aaa\n \n \nbbb").should == "aaa<br /><br />bbb"
51
+ parse("aaa\r\n \r\n \r\nbbb").should == "aaa<br /><br />bbb"
52
+ parse("aaa\n \n\n \nbbb").should == "aaa<br /><br />bbb"
53
+ parse("aaa\n \n\n\n \nbbb").should == "aaa<br /><br />bbb"
54
+ parse("aaa\n\n\n\n\n\n\nbbb").should == "aaa<br /><br />bbb"
55
+ parse("aaa\r\n\r\n\r\nbbb").should == "aaa<br /><br />bbb"
56
+ end
57
+
58
+ ###############################################################################
59
+
60
+ describe "*bold*" do
61
+ it "only" do
62
+ parse("*bold*").should == '<strong>bold</strong>'
63
+ end
64
+ it "at beginning" do
65
+ parse("*bold*\nxxx").should == '<strong>bold</strong><br />xxx'
66
+ end
67
+ it "in the middle of text" do
68
+ parse("xxx *bold* yyy").should == 'xxx <strong>bold</strong> yyy'
69
+ end
70
+ it "parses *multiline\\nbold*" do
71
+ parse("*multiline\nbold*").should == "<strong>multiline<br />bold</strong>"
72
+ end
73
+ it "skips lone star inside bold block" do
74
+ parse("*aaa * bbb*").should == '<strong>aaa * bbb</strong>'
75
+ end
76
+ it "skips lone star" do
77
+ parse("aaa * bbb").should == 'aaa * bbb'
78
+ end
79
+ it "w/o closing tag" do
80
+ parse("*bold").should == '<strong>bold</strong>'
81
+ end
82
+ it "nesting1 w/o closing tags" do
83
+ parse("*bold1 *bold2").should == '<strong>bold1 <strong>bold2</strong></strong>'
84
+ end
85
+ it "nesting2 w/o closing tags" do
86
+ parse("*bold1 *bold2").should == '<strong>bold1 <strong>bold2</strong></strong>'
87
+ end
88
+
89
+ it "not parses '*.*'" do
90
+ parse("*.*").should == "*.*"
91
+ parse(" *.* ").should == "*.*"
92
+ parse("aaa *.* bbb").should == "aaa *.* bbb"
93
+ end
94
+
95
+ it "not parses '*.something'" do
96
+ parse("*.exe").should == "*.exe"
97
+ parse(" *.exe ").should == "*.exe"
98
+ parse("aaa *.exe bbb").should == "aaa *.exe bbb"
99
+ end
100
+
101
+ end
102
+
103
+ ###############################################################################
104
+
105
+ describe "_italic_" do
106
+ it "only" do
107
+ parse("_italic_").should == '<em>italic</em>'
108
+ end
109
+ it "at beginning" do
110
+ parse("_italic_\nxxx").should == '<em>italic</em><br />xxx'
111
+ end
112
+ it "in the middle of text" do
113
+ parse("xxx _italic_ yyy").should == 'xxx <em>italic</em> yyy'
114
+ end
115
+ it "parses _multiline\\nitalic_" do
116
+ parse("_multiline\nitalic_").should == "<em>multiline<br />italic</em>"
117
+ end
118
+ it "skips lone underscore inside italic block" do
119
+ parse("_aaa _ bbb_").should == '<em>aaa _ bbb</em>'
120
+ end
121
+ it "skips lone underscore" do
122
+ parse("aaa _ bbb").should == 'aaa _ bbb'
123
+ end
124
+ it "w/o closing tag" do
125
+ parse("_italic").should == '<em>italic</em>'
126
+ end
127
+ it "nesting1 w/o closing tags" do
128
+ parse("_italic1 _italic2").should == '<em>italic1 <em>italic2</em></em>'
129
+ end
130
+ it "nesting2 w/o closing tags" do
131
+ parse("_italic1 _italic2").should == '<em>italic1 <em>italic2</em></em>'
132
+ end
133
+ end
134
+
135
+ ###############################################################################
136
+
137
+ describe "combinations" do
138
+ it "bold in italic" do
139
+ s = "_aaa *bbb* ccc_"
140
+ parse(s).should == "<em>aaa <strong>bbb</strong> ccc</em>"
141
+ end
142
+ it "bold in italic - no closing1" do
143
+ s = "_aaa *bbb* ccc"
144
+ parse(s).should == "<em>aaa <strong>bbb</strong> ccc</em>"
145
+ end
146
+ it "bold in italic - no closing2" do
147
+ s = "_aaa *bbb ccc"
148
+ parse(s).should == "<em>aaa <strong>bbb ccc</strong></em>"
149
+ end
150
+
151
+ it "italic in bold" do
152
+ s = "*aaa _bbb_ ccc*"
153
+ parse(s).should == "<strong>aaa <em>bbb</em> ccc</strong>"
154
+ end
155
+ it "italic in bold - no closing1" do
156
+ s = "*aaa _bbb_ ccc"
157
+ parse(s).should == "<strong>aaa <em>bbb</em> ccc</strong>"
158
+ end
159
+ it "italic in bold - no closing2" do
160
+ s = "*aaa _bbb ccc"
161
+ parse(s).should == "<strong>aaa <em>bbb ccc</em></strong>"
162
+ end
163
+
164
+ {'ul' => '*', 'ol' => '#'}.each do |l,c|
165
+ it "raw text link inside #{l.upcase}> #1" do
166
+ s = "#{c} aaa http://www.ru"
167
+ parse(s).should == "<#{l}><li>aaa <a href=\"http://www.ru\">http://www.ru</a></li></#{l}>"
168
+ end
169
+ it "raw text link inside #{l.upcase}> #2" do
170
+ s = "#{c} aaa http://www.ru\n#{c} bbb"
171
+ parse(s).should == "<#{l}><li>aaa <a href=\"http://www.ru\">http://www.ru</a></li><li>bbb</li></#{l}>"
172
+ end
173
+ it "raw text link inside #{l.upcase}> #3" do
174
+ s = "#{c} http://www.ru"
175
+ parse(s).should == "<#{l}><li><a href=\"http://www.ru\">http://www.ru</a></li></#{l}>"
176
+ end
177
+ it "raw text link inside #{l.upcase}> #4" do
178
+ s = "#{c} aaa http://www.ru bbb"
179
+ parse(s).should == "<#{l}><li>aaa <a href=\"http://www.ru\">http://www.ru</a> bbb</li></#{l}>"
180
+ end
181
+ it "two links inside #{l.upcase}>" do
182
+ s = "#{c} aaa http://www.ru http://ya.ru bbb"
183
+ parse(s).should == "<#{l}><li>aaa <a href=\"http://www.ru\">http://www.ru</a> <a href=\"http://ya.ru\">http://ya.ru</a> bbb</li></#{l}>"
184
+ end
185
+ end
186
+ end
187
+
188
+ ###############################################################################
189
+
190
+ describe "unnumbered list" do
191
+ it "should work" do
192
+ parse("* a\n* b\n* c").should match(
193
+ %r"<ul><li>a</li><li>b</li><li>c</li></ul>"
194
+ )
195
+ end
196
+ it "two lists" do
197
+ s = "* a\n* b\n* c"
198
+ s = s + "\nxxx\n" + s
199
+ r = "<ul><li>a</li><li>b</li><li>c</li></ul>"
200
+ parse(s).should == "#{r}xxx<br />#{r}"
201
+ end
202
+ it "in middle of text when begins with space" do
203
+ parse("hello\n * a\n * b\n * c\nworld").should ==
204
+ "hello<br /><ul><li>a</li><li>b</li><li>c</li></ul>world"
205
+ end
206
+ it "in middle of text" do
207
+ parse("hello\n* a\n* b\n* c\nworld").should ==
208
+ "hello<br /><ul><li>a</li><li>b</li><li>c</li></ul>world"
209
+ end
210
+ it "after blank line" do
211
+ parse("hello\n\n * a\n * b\n * c\nworld").should ==
212
+ "hello<br /><br /><ul><li>a</li><li>b</li><li>c</li></ul>world"
213
+ end
214
+ end
215
+
216
+ ###############################################################################
217
+
218
+ describe "numbered list" do
219
+ it "should work" do
220
+ parse("# a\n# b\n# c").should match(
221
+ %r"<ol><li>a</li><li>b</li><li>c</li></ol>"
222
+ )
223
+ end
224
+ it "two lists" do
225
+ s = "# a\n# b\n# c"
226
+ s = s + "\nxxx\n" + s
227
+ r = "<ol><li>a</li><li>b</li><li>c</li></ol>"
228
+ parse(s).should == "#{r}xxx<br />#{r}"
229
+ end
230
+ it "in middle of text when begins with space" do
231
+ parse("hello\n # a\n # b\n # c\nworld").should ==
232
+ "hello<br /><ol><li>a</li><li>b</li><li>c</li></ol>world"
233
+ end
234
+ it "in middle of text" do
235
+ parse("hello\n# a\n# b\n# c\nworld").should ==
236
+ "hello<br /><ol><li>a</li><li>b</li><li>c</li></ol>world"
237
+ end
238
+ it "after blank line" do
239
+ parse("hello\n\n # a\n # b\n # c\nworld").should ==
240
+ "hello<br /><br /><ol><li>a</li><li>b</li><li>c</li></ol>world"
241
+ end
242
+ end
243
+
244
+ ###############################################################################
245
+
246
+ 1.upto(5) do |lvl|
247
+ describe "H#{lvl}" do
248
+ it "at the beginning" do
249
+ parse("h#{lvl}. xxx").should == "<h#{lvl} id=\"xxx\">xxx</h#{lvl}>"
250
+ end
251
+ it "after 1 line of text" do
252
+ parse("abcd\nh#{lvl}. xxx").should == "abcd<br /><h#{lvl} id=\"xxx\">xxx</h#{lvl}>"
253
+ end
254
+ it "after 2 lines of text" do
255
+ parse("abcd\ndefgh\nh#{lvl}. xxx").should == "abcd<br />defgh<br /><h#{lvl} id=\"xxx\">xxx</h#{lvl}>"
256
+ end
257
+ it "in middle of other words" do
258
+ parse("abcd defgh h#{lvl}. xxx yyy").should == "abcd defgh h#{lvl}. xxx yyy"
259
+ end
260
+ it "in middle of other lines" do
261
+ parse("abcd defgh\nh#{lvl}. xxx\nyyy").should == "abcd defgh<br /><h#{lvl} id=\"xxx\">xxx</h#{lvl}><br />yyy"
262
+ end
263
+
264
+ it "converts spaces to underscores in id" do
265
+ parse("h#{lvl}. xxx yyy z").should == "<h#{lvl} id=\"xxx___yyy_z\">xxx yyy z</h#{lvl}>"
266
+ end
267
+ it "keeps underscores in id" do
268
+ parse("h#{lvl}. xxx___yyy_z").should == "<h#{lvl} id=\"xxx___yyy_z\">xxx___yyy_z</h#{lvl}>"
269
+ end
270
+ it "keeps dashes in id" do
271
+ parse("h#{lvl}. xxx---yyy-z").should == "<h#{lvl} id=\"xxx---yyy-z\">xxx---yyy-z</h#{lvl}>"
272
+ end
273
+ it "keeps dots in id" do
274
+ parse("h#{lvl}. xxx...yyy.z").should == "<h#{lvl} id=\"xxx...yyy.z\">xxx...yyy.z</h#{lvl}>"
275
+ end
276
+
277
+ %w'Ъ ъ : ; , привет" \' ! < >'.each do |c|
278
+ it "converts id to hex if it contains \"#{c}\"" do
279
+ idhex = hex_string("xxx#{c}yyy")
280
+ parse("h#{lvl}. xxx#{c}yyy").should == "<h#{lvl} id=\"#{idhex}\">xxx#{h(c)}yyy</h#{lvl}>"
281
+ end
282
+ end
283
+
284
+ it "skips excess spaces" do
285
+ parse("h#{lvl}. \t xxx \t ").should == "<h#{lvl} id=\"xxx\">xxx</h#{lvl}>"
286
+ end
287
+
288
+ it "thinks that \\r is EOL" do
289
+ parse("h#{lvl}. xxx\ryyy").should == "<h#{lvl} id=\"xxx\">xxx</h#{lvl}><br />yyy"
290
+ parse("h#{lvl}. xxx\r").should == "<h#{lvl} id=\"xxx\">xxx</h#{lvl}>"
291
+
292
+ parse("h#{lvl}. xxx\r yyy").sub(' yyy','yyy').should ==
293
+ "<h#{lvl} id=\"xxx\">xxx</h#{lvl}><br />yyy"
294
+ end
295
+ end
296
+ end
297
+
298
+ ###############################################################################
299
+
300
+ describe "raw text links" do
301
+ it "at the beginning" do
302
+ parse("http://www.ru").should == "<a href=\"http://www.ru\">http://www.ru</a>"
303
+ end
304
+ it "in middle of other words" do
305
+ parse("aaa bbb ccc http://www.ru ddd eee fff").should ==
306
+ "aaa bbb ccc <a href=\"http://www.ru\">http://www.ru</a> ddd eee fff"
307
+ end
308
+ it "in new line" do
309
+ parse("aaa bbb ccc\nhttp://www.ru\nddd eee fff").should match(
310
+ %r"aaa bbb ccc ?<br /> ?<a href=\"http://www.ru\">http://www.ru</a> ?<br /> ?ddd eee fff"
311
+ )
312
+ end
313
+ it "escapes '&' in link _text_" do
314
+ parse("http://www.ru/?a=1&b=2").should == "<a href=\"http://www.ru/?a=1&b=2\">http://www.ru/?a=1&amp;b=2</a>"
315
+ end
316
+
317
+ it "parses https://" do
318
+ parse("https://www.ru").should == "<a href=\"https://www.ru\">https://www.ru</a>"
319
+ end
320
+
321
+ %w', .'.each do |c|
322
+ it "stops parsing on \"#{c} \"" do
323
+ parse("http://www.ru#{c}").should == "<a href=\"http://www.ru\">http://www.ru</a>#{c}"
324
+ parse(" http://www.ru#{c} ").should == "<a href=\"http://www.ru\">http://www.ru</a>#{c}"
325
+ parse(" http://www.ru#{c} hello!").should == "<a href=\"http://www.ru\">http://www.ru</a>#{c} hello!"
326
+ parse("xxx http://www.ru#{c} hello!").should == "xxx <a href=\"http://www.ru\">http://www.ru</a>#{c} hello!"
327
+ parse(" http://www.ru/#{c} hello!").should == "<a href=\"http://www.ru/\">http://www.ru/</a>#{c} hello!"
328
+ parse(" http://aaa.com#{c} http://bbb.com").should ==
329
+ "<a href=\"http://aaa.com\">http://aaa.com</a>#{c} <a href=\"http://bbb.com\">http://bbb.com</a>"
330
+ end
331
+ end
332
+ end
333
+
334
+ ###############################################################################
335
+
336
+ describe "#ticketNum ticket links" do
337
+ it "at the beginning" do
338
+ parse("#1234").should == '<a href="/spaces/test_space/tickets/1234">#1234</a>'
339
+ end
340
+ it "in middle of other words" do
341
+ parse("aaa bbb ccc #3476 ddd eee fff").should ==
342
+ 'aaa bbb ccc <a href="/spaces/test_space/tickets/3476">#3476</a> ddd eee fff'
343
+ end
344
+ it "in new line" do
345
+ parse("aaa bbb ccc\n#1234\nddd eee fff").should match(
346
+ %r|aaa bbb ccc ?<br /> ?<a href="/spaces/test_space/tickets/1234">#1234</a> ?<br /> ?ddd eee fff|
347
+ )
348
+ end
349
+ it "ignores non-digits" do
350
+ parse("#1234d").should == '#1234d'
351
+ parse("#xxx").should == '#xxx'
352
+ end
353
+ end
354
+
355
+ ###############################################################################
356
+
357
+ describe "<pre><code>..</code></pre>" do
358
+ it "works" do
359
+ s = <<-EOF
360
+ for ( n = 0; n < max_size && \
361
+ (c = getc( yyin )) != EOF && c != '\\n'; ++n ) \
362
+ buf[n] = (char) c; \
363
+
364
+ EOF
365
+
366
+ parse("<pre><code>#{s.strip}</code></pre>").should ==
367
+ "<pre><code>#{h(s.strip)}</code></pre>"
368
+
369
+ s = <<-EOF
370
+ while ( 1 < 2 ) do
371
+ puts "<b>12345\\t54321</b>"
372
+ // *bold* comment
373
+ // _italic_ comment
374
+ end
375
+ ---
376
+ * aaa
377
+ * bbb
378
+ * ccc
379
+
380
+ EOF
381
+ parse("<pre><code>#{s.strip}</code></pre>").should ==
382
+ "<pre><code>#{h(s.strip)}</code></pre>"
383
+ end
384
+ it "not parses *bold*" do
385
+ s = "<pre><code> *bold*</code></pre>"
386
+ parse(s).should == s
387
+ end
388
+ it "not parses _italic_" do
389
+ s = "<pre><code> _italic_</code></pre>"
390
+ parse(s).should == s
391
+ end
392
+ it "not parses UL lists" do
393
+ s = "<pre><code>\n * l1\n * l2\n * l3</code></pre>"
394
+ parse(s).should == s.sub("<code>\n","<code>")
395
+ end
396
+ it "not parses OL lists" do
397
+ s = "<pre><code>\n # l1\n # l2\n # l3</code></pre>"
398
+ parse(s).should == s.sub("<code>\n","<code>")
399
+ end
400
+ it "not parses H1..H5" do
401
+ 1.upto(5) do |i|
402
+ s = "<pre><code>\nh#{i}. zzzzzzz\n</code></pre>"
403
+ parse(s).should == "<pre><code>h#{i}. zzzzzzz</code></pre>"
404
+ end
405
+ end
406
+ it "not parses raw text links" do
407
+ s = "<pre><code>xxx http://www.ru yyy</code></pre>"
408
+ parse(s).should == s
409
+ s = "<pre><code>http://www.ru</code></pre>"
410
+ parse(s).should == s
411
+ end
412
+ it "keeps newlines" do
413
+ s = "<pre><code>aaa\nbbb</code></pre>"
414
+ parse(s).should == s
415
+ s = "<pre><code>aaa\n\nbbb\nccc</code></pre>"
416
+ parse(s).should == s
417
+ end
418
+
419
+ it "with no spaces between <pre> and <code>" do
420
+ s = "<pre><code>aaa</code></pre>"
421
+ parse(s).should == s
422
+ end
423
+
424
+ it "with spaces between <pre> and <code>" do
425
+ s = "<pre> <code>aaa</code> </pre>"
426
+ parse(s).should == s.tr(' ','')
427
+ end
428
+ it "with spaces between <pre> and <code> and inside" do
429
+ s = "<pre> <code> aaa bbb </code> </pre>"
430
+ parse(s).should == "<pre><code> aaa bbb</code></pre>"
431
+ end
432
+
433
+ it "w/o closing tags" do
434
+ s = "<pre><code>aaa"
435
+ parse(s).should match(%r"<pre><code>aaa\n?</code></pre>")
436
+ end
437
+
438
+ it "in middle of text" do
439
+ s = "xxx <pre><code>yyyy</code></pre> jjj"
440
+ parse(s).should == s
441
+ end
442
+
443
+ it "with 2 instances" do
444
+ s = "xxx <pre><code>yyyy</code></pre> <jjj> <pre><code>asdkjaslkd</code></pre> END"
445
+ parse(s).should == s.sub('<jjj>','&lt;jjj&gt;')
446
+ end
447
+
448
+ it "works with unicode" do
449
+ s = "привет <pre><code> жжж </code></pre> пока!"
450
+ parse(s).should match(%r|привет ?<pre><code> жжж</code></pre> ?пока!|)
451
+
452
+ s = 'абвгдеёжзийклмнопрстуфхцчшщьыъэюя'
453
+ parse(s).should == s
454
+
455
+ s = 'АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЬЫЪЭЮЯ'
456
+ parse(s).should == s
457
+
458
+ s = '☸☹☺☻☼☽☾☿'
459
+ parse(s).should == s
460
+ end
461
+
462
+ it "should escape lone closing tags" do
463
+ s = "</code></pre>"
464
+ parse(s).should == h(s)
465
+ end
466
+
467
+ it "should skip newlines and spaces at end" do
468
+ s = "<pre><code> aaa bbb ccc \n\n\n \t\n\n\n\r\n\r\n \t </code></pre>"
469
+ parse(s).should == "<pre><code> aaa bbb ccc</code></pre>"
470
+ end
471
+
472
+ it "escapes html chars" do
473
+ HTML_ESCAPE.each do |k,v|
474
+ parse("<pre><code>#{k}</code></pre>").should == "<pre><code>#{v}</code></pre>"
475
+ end
476
+ end
477
+ end
478
+
479
+ ###############################################################################
480
+
481
+ describe "Assembla Links" do
482
+ a = {}
483
+ a["wiki:Name"] = '<a class="wiki_link" title="Name" href="/wiki/show/test_space/Name">Name</a>'
484
+ a["Name"] = '<a class="wiki_link" title="Name" href="/wiki/show/test_space/Name">Name</a>'
485
+ a["Name#Ref"] = '<a class="wiki_link" title="Name#Ref" href="/wiki/show/test_space/Name#Ref">Name#Ref</a>'
486
+ a["#Ref"] = '<a href="#Ref" title="#Ref" class="wiki_link">#Ref</a>'
487
+ a["#привет"] = %Q|<a href="##{hex_string("привет")}" title="#привет" class="wiki_link">#привет</a>|
488
+ a["#with spc"] = %Q|<a href="#with__spc" title="#with spc" class="wiki_link">#with spc</a>|
489
+ a["#with__usc"] = %Q|<a href="#with__usc" title="#with__usc" class="wiki_link">#with__usc</a>|
490
+ a["#with--dsh"] = %Q|<a href="#with--dsh" title="#with--dsh" class="wiki_link">#with--dsh</a>|
491
+ a["#with!xclm"] = %Q|<a href="##{hex_string("with!xclm")}" title="#with!xclm" class="wiki_link">#with!xclm</a>|
492
+ a["#with&amp"] = %Q|<a href="##{hex_string("with&amp")}" title="#with&amp" class="wiki_link">#with&amp;amp</a>|
493
+
494
+ a["ticket:234"] = '<a href="/spaces/test_space/tickets/234">#234</a>'
495
+ a["revision:1f4bdab77be696efd"] =
496
+ '<a href="http://code.assembla.com/test_space/git/changesets/1f4bdab77be696efd">revision:1f4bdab77be696efd</a>'
497
+ a["revision:12345"] =
498
+ '<a href="http://code.assembla.com/test_space/svn/changesets/12345">revision:12345</a>'
499
+ a["r:2345"] = '<a href="http://code.assembla.com/test_space/svn/changesets/2345">revision:2345</a>'
500
+ a["r:2345ef"] = '<a href="http://code.assembla.com/test_space/git/changesets/2345ef">revision:2345ef</a>'
501
+
502
+ a["url:http://www.ru"] = '<a href="http://www.ru">http://www.ru</a>'
503
+ a["url:https://www.ru"] = '<a href="https://www.ru">https://www.ru</a>'
504
+ a["url:www.ru"] = '<a href="http://www.ru">http://www.ru</a>'
505
+ a["url:www.ru/?a=1&b=2"] = '<a href="http://www.ru/?a=1&b=2">http://www.ru/?a=1&amp;b=2</a>'
506
+ a["url:ftp://www.ru"] = '<a href="ftp://www.ru">ftp://www.ru</a>'
507
+ a["url:/spaces/x2"] = '<a href="/spaces/x2">/spaces/x2</a>'
508
+
509
+ a.each do |k,v|
510
+ it "parses [[#{k}]]" do
511
+ parse("[[#{k}]]").should == v
512
+ end
513
+ it "parses [[#{k}|привет тест]]" do
514
+ parse("[[#{k}|привет тест]]").should == v.sub(/>.*</,">привет тест<")
515
+ end
516
+ it "parses [[#{k}|test & here]]" do
517
+ parse("[[#{k}|test & here]]").should == v.sub(/>.*</,">test &amp; here<")
518
+ end
519
+ end
520
+
521
+ it "keeps unknown link types" do
522
+ s = "[[zzz:xxx]]"
523
+ parse(s).should == s
524
+ s = "[[abcd:1234]]"
525
+ parse(s).should == s
526
+ s = "[[abcd::1234]] [[abcd:1234]] [[uri:www.ru]]"
527
+ parse(s).should == s
528
+ end
529
+
530
+ it "links to ExistingFile.txt"
531
+ it "links to NotExistingFile.txt"
532
+ it "links to ExistingImage.png"
533
+ it "links to NotExistingImage.png"
534
+ end
535
+
536
+ ###############################################################################
537
+ ###############################################################################
538
+ ###############################################################################
539
+
540
+ HTML_ESCAPE = { '&' => '&amp;', '>' => '&gt;', '<' => '&lt;', '"' => '&quot;' }
541
+
542
+ def h s
543
+ s.to_s.gsub(/[&"><]/) { |special| HTML_ESCAPE[special] }
544
+ end
545
+
546
+ def parse s
547
+ BreakoutParser.parse(s).strip
548
+ end
549
+ end
metadata ADDED
@@ -0,0 +1,71 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: breakout_parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ platform: x86-mswin32
6
+ authors:
7
+ - Andrey "Zed" Zaikin
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2010-01-19 00:00:00 +05:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: rspec
17
+ type: :development
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 1.2.9
24
+ version:
25
+ description: BreakoutParser
26
+ email: zed.0xff@gmail.com
27
+ executables: []
28
+
29
+ extensions: []
30
+
31
+ extra_rdoc_files:
32
+ - LICENSE
33
+ files:
34
+ - .gitignore
35
+ - LICENSE
36
+ - Rakefile
37
+ - VERSION
38
+ - lib/breakout_parser.rb
39
+ - lib/breakout_parser/win32-ruby1.8/breakout_parser.so
40
+ - spec/parser_examples_spec.rb
41
+ - spec/parser_spec.rb
42
+ has_rdoc: true
43
+ homepage: http://assembla.com
44
+ licenses: []
45
+
46
+ post_install_message:
47
+ rdoc_options:
48
+ - --charset=UTF-8
49
+ require_paths:
50
+ - lib
51
+ required_ruby_version: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: "0"
56
+ version:
57
+ required_rubygems_version: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: "0"
62
+ version:
63
+ requirements: []
64
+
65
+ rubyforge_project:
66
+ rubygems_version: 1.3.5
67
+ signing_key:
68
+ specification_version: 3
69
+ summary: BreakoutParser
70
+ test_files:
71
+ - spec/parser_spec.rb