breakout_parser 0.0.0-x86-mswin32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,16 @@
1
+ wiki
2
+ pkg
3
+ lex.yy.c
4
+ *.tab.c
5
+ *.tab.h
6
+ *.tmp
7
+ ext/breakout_parser/*.so
8
+ *.o
9
+ *.def
10
+ *.exp
11
+ *.lib
12
+ *.pdb
13
+ *.obj
14
+ *.manifest
15
+ *.gemspec
16
+ Makefile
data/LICENSE ADDED
@@ -0,0 +1,39 @@
1
+ Copyright (c) 2010 Assembla, Inc.
2
+
3
+ SINGLETON DEVELOPMENT LICENSE
4
+
5
+ Redistribution and use in source and binary forms, with or without
6
+ modification, are permitted provided that the following conditions are met:
7
+
8
+ * This is a license for research and development use only, and does not
9
+ include a license for commercial use. If you desire a license for commercial
10
+ use, you must contact the copyright owner for a commercial use license,
11
+ which would supersede the terms of this license. "Commercial Use" means any
12
+ use (internal or external), copying, sublicensing or distribution
13
+ (internal or external), directly or indirectly, for commercial or strategic
14
+ gain or advantage, including use in internal operations or in providing
15
+ products or services to any third party. Research and development for
16
+ eventual commercial use is not "Commercial Use" so long as a commercial
17
+ use license is obtained prior to commercial use. Redistribution to others
18
+ for their research and development use is not "Commercial Use".
19
+
20
+ * Redistributions of source code must retain the above copyright notice,
21
+ this list of conditions and the following disclaimer.
22
+
23
+ * Redistributions in binary form must reproduce the above copyright notice,
24
+ this list of conditions and the following disclaimer in the documentation
25
+ and/or other materials provided with the distribution. Redistribution in
26
+ binary form does not require redistribution of source code.
27
+
28
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31
+ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34
+ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36
+ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
+ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
38
+ THE POSSIBILITY OF SUCH DAMAGE.
39
+
data/Rakefile ADDED
@@ -0,0 +1,99 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+
7
+ def gen_tasks
8
+ Jeweler::Tasks.new do |gem|
9
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
10
+ gem.name = "breakout_parser"
11
+ gem.platform = ENV['PLATFORM'] if ENV['PLATFORM']
12
+ gem.summary = %Q{BreakoutParser}
13
+ gem.description = %Q{BreakoutParser}
14
+ gem.email = "zed.0xff@gmail.com"
15
+ gem.homepage = "http://assembla.com"
16
+ gem.authors = ["Andrey \"Zed\" Zaikin"]
17
+ gem.add_development_dependency "rspec", ">= 1.2.9"
18
+ gem.test_files.delete 'spec/parser_examples_spec.rb'
19
+ gem.files.delete_if{ |f| f[0..8] == 'examples/' }
20
+ gem.files.delete_if{ |f| f[0..4] == 'misc/' }
21
+ if gem.platform == 'ruby'
22
+ gem.files.include 'ext/**/*'
23
+ gem.files.delete "ext/breakout_parser/Makefile"
24
+ gem.files.delete_if{ |f| f[-3..-1] == '.so' }
25
+ else
26
+ gem.files.include 'lib/**/*.so'
27
+ gem.extensions = '.' # HACK: package no extensions
28
+ end
29
+ end
30
+ Jeweler::GemcutterTasks.new
31
+ end
32
+ gen_tasks
33
+ rescue LoadError
34
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
35
+ end
36
+
37
+ require 'spec/rake/spectask'
38
+ Spec::Rake::SpecTask.new(:spec) do |spec|
39
+ spec.libs << 'lib' << 'spec'
40
+ spec.spec_files = FileList['spec/**/*_spec.rb']
41
+ end
42
+
43
+ Spec::Rake::SpecTask.new(:rcov) do |spec|
44
+ spec.libs << 'lib' << 'spec'
45
+ spec.pattern = 'spec/**/*_spec.rb'
46
+ spec.rcov = true
47
+ end
48
+
49
+ task :spec => :check_dependencies
50
+
51
+ task :default => :spec
52
+
53
+ require 'rake/rdoctask'
54
+ Rake::RDocTask.new do |rdoc|
55
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
56
+
57
+ rdoc.rdoc_dir = 'rdoc'
58
+ rdoc.title = "breakout_parser #{version}"
59
+ # rdoc.rdoc_files.include('README*')
60
+ rdoc.rdoc_files.include('lib/**/*.rb')
61
+ end
62
+
63
+ ######################
64
+
65
+ namespace :build do
66
+ desc "Build all gem variants"
67
+ task :all do
68
+ Rake::Task[ :build ].execute
69
+
70
+ @gems_to_push = []
71
+ @gems_to_push << Rake.application.jeweler.gemspec_helper.gem_path
72
+
73
+ gem = Rake.application.jeweler_tasks.gemspec
74
+ gem.files.delete_if{ |f| f[0..3] == 'ext/' }
75
+ gem.extensions = []
76
+ gem.files.include 'lib/**/*.so'
77
+
78
+ gem.original_platform = nil
79
+ gem.platform = 'x86-mingw32'
80
+ Rake::Task[ :build ].execute
81
+ @gems_to_push << Rake.application.jeweler.gemspec_helper.gem_path
82
+
83
+ gem.original_platform = nil
84
+ gem.platform = 'x86-mswin32'
85
+ Rake::Task[ :build ].execute
86
+ @gems_to_push << Rake.application.jeweler.gemspec_helper.gem_path
87
+ end
88
+ end
89
+
90
+ namespace 'gemcutter:release' do
91
+ desc "Release all gem variants"
92
+ task :all => 'build:all' do
93
+ @gems_to_push.each do |fname|
94
+ command = "gem push #{fname}"
95
+ puts "Executing #{command.inspect}:"
96
+ sh command
97
+ end
98
+ end
99
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.0
@@ -0,0 +1,6 @@
1
+ if RUBY_PLATFORM =~/(mswin|mingw)/i
2
+ # Fat binary gems, you make the Rockin' world go round
3
+ require "breakout_parser/win32-ruby#{RUBY_VERSION.sub(/\.\d+$/, '')}/breakout_parser"
4
+ else
5
+ require 'breakout_parser/breakout_parser'
6
+ end
@@ -0,0 +1,101 @@
1
+ describe 'BreakoutParser' do
2
+
3
+ describe "bad examples" do
4
+ Dir["examples/orig/*.bad"].sort.each do |fname|
5
+ it "should not die on #{fname} " do
6
+ data = File.read(fname)
7
+ parse_file(fname).size.should >= File.read(fname).strip.gsub(/\s+/,' ').size
8
+ end
9
+ end
10
+ end
11
+ describe "pending examples" do
12
+ Dir["examples/orig/*.pending"].sort.each do |fname|
13
+ it "should parse #{fname} "
14
+ end
15
+ end
16
+
17
+ describe "preparsed examples" do
18
+ Dir["examples/orig/*.txt"].sort.each do |fname|
19
+ bname = File.basename(fname)
20
+ it "should parse #{fname} " do
21
+ preparsed = File.read("examples/parsed/#{bname}")
22
+ preparsed = preparsed[3..-1] if preparsed[0..2] == '<p>'
23
+ preparsed = preparsed[0..-5] if preparsed[-4..-1] == '</p>'
24
+ preparsed.gsub!("&#8211;","-")
25
+ preparsed.gsub!("&#8212;","--")
26
+ preparsed.gsub!("&#8216;","'")
27
+ preparsed.gsub!("&#8217;","'")
28
+ preparsed.gsub!("&#8230;","...")
29
+ preparsed.gsub!("&#215;","x")
30
+ preparsed.gsub!("&#169;","(c)")
31
+ preparsed.gsub!("<br />\n","<br />")
32
+ preparsed.gsub!(/[ \t]+<br \/>/,"<br />")
33
+ preparsed.gsub!("\t"," ")
34
+ if preparsed['<hr />']
35
+ # find longest dash-line in source
36
+ dashline = File.read(fname).scan(/-+/).sort_by{ |x| -x.length }.first
37
+ preparsed.gsub!("</p>\n<hr />\n<p>","<br /><br />#{dashline}<br /><br />");
38
+ end
39
+
40
+ # preparsed.gsub!(/^<p>/,"");
41
+ # preparsed.gsub!(/<\/p>$/,"");
42
+ preparsed.gsub!("</pre>\n<ol>","</pre><br /><br /><ol>")
43
+ preparsed.gsub!(/<\/p>\s+<p>/,"<br /><br />")
44
+ preparsed.gsub!("</p>\n","<br /><br />")
45
+ preparsed.gsub!("<p>","<br /><br />")
46
+ preparsed.gsub!(/[\r\n]+ */," ")
47
+ preparsed.gsub!(/[ \t]{2,}/," ")
48
+
49
+ preparsed.gsub!("<del>","-")
50
+ preparsed.gsub!("</del>","-")
51
+ preparsed.gsub!(/<br \/>[ ]+/,"<br />")
52
+ preparsed.gsub!(/(<br \/>){2,}/,"<br /><br />")
53
+ # preparsed.gsub!("<br /><ol>","<ol>")
54
+ # preparsed.gsub!("<br /><ul>","<ul>")
55
+ # preparsed.gsub!("<br /><br /><ul>","<br /><ul>")
56
+
57
+ parsed = parse_file(fname)
58
+
59
+ # old parser not parses raw text urls
60
+ #parsed.gsub!(%r'<a href="([^<>"]+)">([^<>"]+)</a>',"\\1")
61
+
62
+ t1 = parsed
63
+ t2 = preparsed
64
+
65
+ [t1,t2].each do |t|
66
+ t.downcase!
67
+ t.gsub!(/(\s*<br \/>\s*)+/,' ')
68
+ t.gsub!(/\n\s*/,"\n")
69
+ # t.gsub!(/>[ \t]+</,"><")
70
+ t.gsub!(/>[ \t]+/,">")
71
+ t.gsub!(/[ \t]+</,"<")
72
+ t.gsub!(/[\r\n \t]+/," ")
73
+ t.strip!
74
+ end
75
+
76
+ if t1 != t2
77
+ # File.open("last-parsed.tmp","w"){ |f| f << parsed }
78
+ # File.open("last-preparsed.tmp","w"){ |f| f << preparsed }
79
+ pos = 0
80
+ pos += 1 while t1[0..pos] == t2[0..pos]
81
+ pos -= 5
82
+ pos = 0 if pos<0
83
+ t1[pos..-1].should == t2[pos..-1]
84
+ end
85
+ t1.should == t2
86
+ end
87
+ $n ||= 0
88
+ $n += 1
89
+ # break if $n == 1900
90
+ end
91
+ end
92
+
93
+ ###############################################################################
94
+ ###############################################################################
95
+ ###############################################################################
96
+
97
+ def parse_file fname
98
+ r = `cat #{fname} | ./parser`
99
+ r.strip
100
+ end
101
+ end
@@ -0,0 +1,549 @@
1
+ require 'breakout_parser'
2
+
3
+ describe 'BreakoutParser' do
4
+ def self.hex_string s
5
+ s.each_byte.to_a.map{ |c| "%02x" % c }.join
6
+ end
7
+ def hex_string s; self.class.hex_string(s); end
8
+
9
+ it 'converts \n to <br />' do
10
+ parse("aaa\nbbb").should match(%r"aaa ?<br /> ?bbb")
11
+ end
12
+
13
+ it "parses 1M file #1" do
14
+ s = 'a' * 1024 * 1024
15
+ parse(s).size.should == s.size
16
+ end
17
+
18
+ it "parses 1M file #2" do
19
+ s = 'a' + (' ' * 1024 * 1024) + 'b'
20
+ parse(s).should == 'a b'
21
+ end
22
+
23
+ it "parses 1M file #3" do
24
+ s = 'a ' * 1024 * 512
25
+ parse(s).size.should == s.strip.size
26
+ end
27
+
28
+ it "strips tailing spaces and newlines" do
29
+ parse("aaa ").should == "aaa"
30
+ parse("aaa\t\t\t\t\t\t").should == "aaa"
31
+ parse("aaa\r\r\r\r\r").should == "aaa"
32
+ parse("aaa\n\n\n\n\n").should == "aaa"
33
+ parse("aaa\r\n\r\n\r\n\r\n").should == "aaa"
34
+ parse("aaa\r\n\t \t \n \r \n \t \t\n\r ").should == "aaa"
35
+ end
36
+
37
+ it "strips leading spaces and newlines" do
38
+ parse(" aaa").should == "aaa"
39
+ parse("\t\t\t\t\t\taaa").should == "aaa"
40
+ parse("\r\r\r\r\raaa").should == "aaa"
41
+ parse("\n\n\n\n\naaa").should == "aaa"
42
+ parse("\r\n\r\n\r\n\r\naaa").should == "aaa"
43
+ parse("\r\n\t \t \n \r \n \t \t\n\r aaa").should == "aaa"
44
+ end
45
+
46
+ it "converts two or more \\n to single empty line" do
47
+ parse("aaa\n\nbbb").should == "aaa<br /><br />bbb"
48
+ parse("aaa\n \nbbb").should == "aaa<br /><br />bbb"
49
+ parse("aaa\n\n\nbbb").should == "aaa<br /><br />bbb"
50
+ parse("aaa\n \n \nbbb").should == "aaa<br /><br />bbb"
51
+ parse("aaa\r\n \r\n \r\nbbb").should == "aaa<br /><br />bbb"
52
+ parse("aaa\n \n\n \nbbb").should == "aaa<br /><br />bbb"
53
+ parse("aaa\n \n\n\n \nbbb").should == "aaa<br /><br />bbb"
54
+ parse("aaa\n\n\n\n\n\n\nbbb").should == "aaa<br /><br />bbb"
55
+ parse("aaa\r\n\r\n\r\nbbb").should == "aaa<br /><br />bbb"
56
+ end
57
+
58
+ ###############################################################################
59
+
60
+ describe "*bold*" do
61
+ it "only" do
62
+ parse("*bold*").should == '<strong>bold</strong>'
63
+ end
64
+ it "at beginning" do
65
+ parse("*bold*\nxxx").should == '<strong>bold</strong><br />xxx'
66
+ end
67
+ it "in the middle of text" do
68
+ parse("xxx *bold* yyy").should == 'xxx <strong>bold</strong> yyy'
69
+ end
70
+ it "parses *multiline\\nbold*" do
71
+ parse("*multiline\nbold*").should == "<strong>multiline<br />bold</strong>"
72
+ end
73
+ it "skips lone star inside bold block" do
74
+ parse("*aaa * bbb*").should == '<strong>aaa * bbb</strong>'
75
+ end
76
+ it "skips lone star" do
77
+ parse("aaa * bbb").should == 'aaa * bbb'
78
+ end
79
+ it "w/o closing tag" do
80
+ parse("*bold").should == '<strong>bold</strong>'
81
+ end
82
+ it "nesting1 w/o closing tags" do
83
+ parse("*bold1 *bold2").should == '<strong>bold1 <strong>bold2</strong></strong>'
84
+ end
85
+ it "nesting2 w/o closing tags" do
86
+ parse("*bold1 *bold2").should == '<strong>bold1 <strong>bold2</strong></strong>'
87
+ end
88
+
89
+ it "not parses '*.*'" do
90
+ parse("*.*").should == "*.*"
91
+ parse(" *.* ").should == "*.*"
92
+ parse("aaa *.* bbb").should == "aaa *.* bbb"
93
+ end
94
+
95
+ it "not parses '*.something'" do
96
+ parse("*.exe").should == "*.exe"
97
+ parse(" *.exe ").should == "*.exe"
98
+ parse("aaa *.exe bbb").should == "aaa *.exe bbb"
99
+ end
100
+
101
+ end
102
+
103
+ ###############################################################################
104
+
105
+ describe "_italic_" do
106
+ it "only" do
107
+ parse("_italic_").should == '<em>italic</em>'
108
+ end
109
+ it "at beginning" do
110
+ parse("_italic_\nxxx").should == '<em>italic</em><br />xxx'
111
+ end
112
+ it "in the middle of text" do
113
+ parse("xxx _italic_ yyy").should == 'xxx <em>italic</em> yyy'
114
+ end
115
+ it "parses _multiline\\nitalic_" do
116
+ parse("_multiline\nitalic_").should == "<em>multiline<br />italic</em>"
117
+ end
118
+ it "skips lone underscore inside italic block" do
119
+ parse("_aaa _ bbb_").should == '<em>aaa _ bbb</em>'
120
+ end
121
+ it "skips lone underscore" do
122
+ parse("aaa _ bbb").should == 'aaa _ bbb'
123
+ end
124
+ it "w/o closing tag" do
125
+ parse("_italic").should == '<em>italic</em>'
126
+ end
127
+ it "nesting1 w/o closing tags" do
128
+ parse("_italic1 _italic2").should == '<em>italic1 <em>italic2</em></em>'
129
+ end
130
+ it "nesting2 w/o closing tags" do
131
+ parse("_italic1 _italic2").should == '<em>italic1 <em>italic2</em></em>'
132
+ end
133
+ end
134
+
135
+ ###############################################################################
136
+
137
+ describe "combinations" do
138
+ it "bold in italic" do
139
+ s = "_aaa *bbb* ccc_"
140
+ parse(s).should == "<em>aaa <strong>bbb</strong> ccc</em>"
141
+ end
142
+ it "bold in italic - no closing1" do
143
+ s = "_aaa *bbb* ccc"
144
+ parse(s).should == "<em>aaa <strong>bbb</strong> ccc</em>"
145
+ end
146
+ it "bold in italic - no closing2" do
147
+ s = "_aaa *bbb ccc"
148
+ parse(s).should == "<em>aaa <strong>bbb ccc</strong></em>"
149
+ end
150
+
151
+ it "italic in bold" do
152
+ s = "*aaa _bbb_ ccc*"
153
+ parse(s).should == "<strong>aaa <em>bbb</em> ccc</strong>"
154
+ end
155
+ it "italic in bold - no closing1" do
156
+ s = "*aaa _bbb_ ccc"
157
+ parse(s).should == "<strong>aaa <em>bbb</em> ccc</strong>"
158
+ end
159
+ it "italic in bold - no closing2" do
160
+ s = "*aaa _bbb ccc"
161
+ parse(s).should == "<strong>aaa <em>bbb ccc</em></strong>"
162
+ end
163
+
164
+ {'ul' => '*', 'ol' => '#'}.each do |l,c|
165
+ it "raw text link inside #{l.upcase}> #1" do
166
+ s = "#{c} aaa http://www.ru"
167
+ parse(s).should == "<#{l}><li>aaa <a href=\"http://www.ru\">http://www.ru</a></li></#{l}>"
168
+ end
169
+ it "raw text link inside #{l.upcase}> #2" do
170
+ s = "#{c} aaa http://www.ru\n#{c} bbb"
171
+ parse(s).should == "<#{l}><li>aaa <a href=\"http://www.ru\">http://www.ru</a></li><li>bbb</li></#{l}>"
172
+ end
173
+ it "raw text link inside #{l.upcase}> #3" do
174
+ s = "#{c} http://www.ru"
175
+ parse(s).should == "<#{l}><li><a href=\"http://www.ru\">http://www.ru</a></li></#{l}>"
176
+ end
177
+ it "raw text link inside #{l.upcase}> #4" do
178
+ s = "#{c} aaa http://www.ru bbb"
179
+ parse(s).should == "<#{l}><li>aaa <a href=\"http://www.ru\">http://www.ru</a> bbb</li></#{l}>"
180
+ end
181
+ it "two links inside #{l.upcase}>" do
182
+ s = "#{c} aaa http://www.ru http://ya.ru bbb"
183
+ parse(s).should == "<#{l}><li>aaa <a href=\"http://www.ru\">http://www.ru</a> <a href=\"http://ya.ru\">http://ya.ru</a> bbb</li></#{l}>"
184
+ end
185
+ end
186
+ end
187
+
188
+ ###############################################################################
189
+
190
+ describe "unnumbered list" do
191
+ it "should work" do
192
+ parse("* a\n* b\n* c").should match(
193
+ %r"<ul><li>a</li><li>b</li><li>c</li></ul>"
194
+ )
195
+ end
196
+ it "two lists" do
197
+ s = "* a\n* b\n* c"
198
+ s = s + "\nxxx\n" + s
199
+ r = "<ul><li>a</li><li>b</li><li>c</li></ul>"
200
+ parse(s).should == "#{r}xxx<br />#{r}"
201
+ end
202
+ it "in middle of text when begins with space" do
203
+ parse("hello\n * a\n * b\n * c\nworld").should ==
204
+ "hello<br /><ul><li>a</li><li>b</li><li>c</li></ul>world"
205
+ end
206
+ it "in middle of text" do
207
+ parse("hello\n* a\n* b\n* c\nworld").should ==
208
+ "hello<br /><ul><li>a</li><li>b</li><li>c</li></ul>world"
209
+ end
210
+ it "after blank line" do
211
+ parse("hello\n\n * a\n * b\n * c\nworld").should ==
212
+ "hello<br /><br /><ul><li>a</li><li>b</li><li>c</li></ul>world"
213
+ end
214
+ end
215
+
216
+ ###############################################################################
217
+
218
+ describe "numbered list" do
219
+ it "should work" do
220
+ parse("# a\n# b\n# c").should match(
221
+ %r"<ol><li>a</li><li>b</li><li>c</li></ol>"
222
+ )
223
+ end
224
+ it "two lists" do
225
+ s = "# a\n# b\n# c"
226
+ s = s + "\nxxx\n" + s
227
+ r = "<ol><li>a</li><li>b</li><li>c</li></ol>"
228
+ parse(s).should == "#{r}xxx<br />#{r}"
229
+ end
230
+ it "in middle of text when begins with space" do
231
+ parse("hello\n # a\n # b\n # c\nworld").should ==
232
+ "hello<br /><ol><li>a</li><li>b</li><li>c</li></ol>world"
233
+ end
234
+ it "in middle of text" do
235
+ parse("hello\n# a\n# b\n# c\nworld").should ==
236
+ "hello<br /><ol><li>a</li><li>b</li><li>c</li></ol>world"
237
+ end
238
+ it "after blank line" do
239
+ parse("hello\n\n # a\n # b\n # c\nworld").should ==
240
+ "hello<br /><br /><ol><li>a</li><li>b</li><li>c</li></ol>world"
241
+ end
242
+ end
243
+
244
+ ###############################################################################
245
+
246
+ 1.upto(5) do |lvl|
247
+ describe "H#{lvl}" do
248
+ it "at the beginning" do
249
+ parse("h#{lvl}. xxx").should == "<h#{lvl} id=\"xxx\">xxx</h#{lvl}>"
250
+ end
251
+ it "after 1 line of text" do
252
+ parse("abcd\nh#{lvl}. xxx").should == "abcd<br /><h#{lvl} id=\"xxx\">xxx</h#{lvl}>"
253
+ end
254
+ it "after 2 lines of text" do
255
+ parse("abcd\ndefgh\nh#{lvl}. xxx").should == "abcd<br />defgh<br /><h#{lvl} id=\"xxx\">xxx</h#{lvl}>"
256
+ end
257
+ it "in middle of other words" do
258
+ parse("abcd defgh h#{lvl}. xxx yyy").should == "abcd defgh h#{lvl}. xxx yyy"
259
+ end
260
+ it "in middle of other lines" do
261
+ parse("abcd defgh\nh#{lvl}. xxx\nyyy").should == "abcd defgh<br /><h#{lvl} id=\"xxx\">xxx</h#{lvl}><br />yyy"
262
+ end
263
+
264
+ it "converts spaces to underscores in id" do
265
+ parse("h#{lvl}. xxx yyy z").should == "<h#{lvl} id=\"xxx___yyy_z\">xxx yyy z</h#{lvl}>"
266
+ end
267
+ it "keeps underscores in id" do
268
+ parse("h#{lvl}. xxx___yyy_z").should == "<h#{lvl} id=\"xxx___yyy_z\">xxx___yyy_z</h#{lvl}>"
269
+ end
270
+ it "keeps dashes in id" do
271
+ parse("h#{lvl}. xxx---yyy-z").should == "<h#{lvl} id=\"xxx---yyy-z\">xxx---yyy-z</h#{lvl}>"
272
+ end
273
+ it "keeps dots in id" do
274
+ parse("h#{lvl}. xxx...yyy.z").should == "<h#{lvl} id=\"xxx...yyy.z\">xxx...yyy.z</h#{lvl}>"
275
+ end
276
+
277
+ %w'Ъ ъ : ; , привет" \' ! < >'.each do |c|
278
+ it "converts id to hex if it contains \"#{c}\"" do
279
+ idhex = hex_string("xxx#{c}yyy")
280
+ parse("h#{lvl}. xxx#{c}yyy").should == "<h#{lvl} id=\"#{idhex}\">xxx#{h(c)}yyy</h#{lvl}>"
281
+ end
282
+ end
283
+
284
+ it "skips excess spaces" do
285
+ parse("h#{lvl}. \t xxx \t ").should == "<h#{lvl} id=\"xxx\">xxx</h#{lvl}>"
286
+ end
287
+
288
+ it "thinks that \\r is EOL" do
289
+ parse("h#{lvl}. xxx\ryyy").should == "<h#{lvl} id=\"xxx\">xxx</h#{lvl}><br />yyy"
290
+ parse("h#{lvl}. xxx\r").should == "<h#{lvl} id=\"xxx\">xxx</h#{lvl}>"
291
+
292
+ parse("h#{lvl}. xxx\r yyy").sub(' yyy','yyy').should ==
293
+ "<h#{lvl} id=\"xxx\">xxx</h#{lvl}><br />yyy"
294
+ end
295
+ end
296
+ end
297
+
298
+ ###############################################################################
299
+
300
+ describe "raw text links" do
301
+ it "at the beginning" do
302
+ parse("http://www.ru").should == "<a href=\"http://www.ru\">http://www.ru</a>"
303
+ end
304
+ it "in middle of other words" do
305
+ parse("aaa bbb ccc http://www.ru ddd eee fff").should ==
306
+ "aaa bbb ccc <a href=\"http://www.ru\">http://www.ru</a> ddd eee fff"
307
+ end
308
+ it "in new line" do
309
+ parse("aaa bbb ccc\nhttp://www.ru\nddd eee fff").should match(
310
+ %r"aaa bbb ccc ?<br /> ?<a href=\"http://www.ru\">http://www.ru</a> ?<br /> ?ddd eee fff"
311
+ )
312
+ end
313
+ it "escapes '&' in link _text_" do
314
+ parse("http://www.ru/?a=1&b=2").should == "<a href=\"http://www.ru/?a=1&b=2\">http://www.ru/?a=1&amp;b=2</a>"
315
+ end
316
+
317
+ it "parses https://" do
318
+ parse("https://www.ru").should == "<a href=\"https://www.ru\">https://www.ru</a>"
319
+ end
320
+
321
+ %w', .'.each do |c|
322
+ it "stops parsing on \"#{c} \"" do
323
+ parse("http://www.ru#{c}").should == "<a href=\"http://www.ru\">http://www.ru</a>#{c}"
324
+ parse(" http://www.ru#{c} ").should == "<a href=\"http://www.ru\">http://www.ru</a>#{c}"
325
+ parse(" http://www.ru#{c} hello!").should == "<a href=\"http://www.ru\">http://www.ru</a>#{c} hello!"
326
+ parse("xxx http://www.ru#{c} hello!").should == "xxx <a href=\"http://www.ru\">http://www.ru</a>#{c} hello!"
327
+ parse(" http://www.ru/#{c} hello!").should == "<a href=\"http://www.ru/\">http://www.ru/</a>#{c} hello!"
328
+ parse(" http://aaa.com#{c} http://bbb.com").should ==
329
+ "<a href=\"http://aaa.com\">http://aaa.com</a>#{c} <a href=\"http://bbb.com\">http://bbb.com</a>"
330
+ end
331
+ end
332
+ end
333
+
334
+ ###############################################################################
335
+
336
+ describe "#ticketNum ticket links" do
337
+ it "at the beginning" do
338
+ parse("#1234").should == '<a href="/spaces/test_space/tickets/1234">#1234</a>'
339
+ end
340
+ it "in middle of other words" do
341
+ parse("aaa bbb ccc #3476 ddd eee fff").should ==
342
+ 'aaa bbb ccc <a href="/spaces/test_space/tickets/3476">#3476</a> ddd eee fff'
343
+ end
344
+ it "in new line" do
345
+ parse("aaa bbb ccc\n#1234\nddd eee fff").should match(
346
+ %r|aaa bbb ccc ?<br /> ?<a href="/spaces/test_space/tickets/1234">#1234</a> ?<br /> ?ddd eee fff|
347
+ )
348
+ end
349
+ it "ignores non-digits" do
350
+ parse("#1234d").should == '#1234d'
351
+ parse("#xxx").should == '#xxx'
352
+ end
353
+ end
354
+
355
+ ###############################################################################
356
+
357
+ describe "<pre><code>..</code></pre>" do
358
+ it "works" do
359
+ s = <<-EOF
360
+ for ( n = 0; n < max_size && \
361
+ (c = getc( yyin )) != EOF && c != '\\n'; ++n ) \
362
+ buf[n] = (char) c; \
363
+
364
+ EOF
365
+
366
+ parse("<pre><code>#{s.strip}</code></pre>").should ==
367
+ "<pre><code>#{h(s.strip)}</code></pre>"
368
+
369
+ s = <<-EOF
370
+ while ( 1 < 2 ) do
371
+ puts "<b>12345\\t54321</b>"
372
+ // *bold* comment
373
+ // _italic_ comment
374
+ end
375
+ ---
376
+ * aaa
377
+ * bbb
378
+ * ccc
379
+
380
+ EOF
381
+ parse("<pre><code>#{s.strip}</code></pre>").should ==
382
+ "<pre><code>#{h(s.strip)}</code></pre>"
383
+ end
384
+ it "not parses *bold*" do
385
+ s = "<pre><code> *bold*</code></pre>"
386
+ parse(s).should == s
387
+ end
388
+ it "not parses _italic_" do
389
+ s = "<pre><code> _italic_</code></pre>"
390
+ parse(s).should == s
391
+ end
392
+ it "not parses UL lists" do
393
+ s = "<pre><code>\n * l1\n * l2\n * l3</code></pre>"
394
+ parse(s).should == s.sub("<code>\n","<code>")
395
+ end
396
+ it "not parses OL lists" do
397
+ s = "<pre><code>\n # l1\n # l2\n # l3</code></pre>"
398
+ parse(s).should == s.sub("<code>\n","<code>")
399
+ end
400
+ it "not parses H1..H5" do
401
+ 1.upto(5) do |i|
402
+ s = "<pre><code>\nh#{i}. zzzzzzz\n</code></pre>"
403
+ parse(s).should == "<pre><code>h#{i}. zzzzzzz</code></pre>"
404
+ end
405
+ end
406
+ it "not parses raw text links" do
407
+ s = "<pre><code>xxx http://www.ru yyy</code></pre>"
408
+ parse(s).should == s
409
+ s = "<pre><code>http://www.ru</code></pre>"
410
+ parse(s).should == s
411
+ end
412
+ it "keeps newlines" do
413
+ s = "<pre><code>aaa\nbbb</code></pre>"
414
+ parse(s).should == s
415
+ s = "<pre><code>aaa\n\nbbb\nccc</code></pre>"
416
+ parse(s).should == s
417
+ end
418
+
419
+ it "with no spaces between <pre> and <code>" do
420
+ s = "<pre><code>aaa</code></pre>"
421
+ parse(s).should == s
422
+ end
423
+
424
+ it "with spaces between <pre> and <code>" do
425
+ s = "<pre> <code>aaa</code> </pre>"
426
+ parse(s).should == s.tr(' ','')
427
+ end
428
+ it "with spaces between <pre> and <code> and inside" do
429
+ s = "<pre> <code> aaa bbb </code> </pre>"
430
+ parse(s).should == "<pre><code> aaa bbb</code></pre>"
431
+ end
432
+
433
+ it "w/o closing tags" do
434
+ s = "<pre><code>aaa"
435
+ parse(s).should match(%r"<pre><code>aaa\n?</code></pre>")
436
+ end
437
+
438
+ it "in middle of text" do
439
+ s = "xxx <pre><code>yyyy</code></pre> jjj"
440
+ parse(s).should == s
441
+ end
442
+
443
+ it "with 2 instances" do
444
+ s = "xxx <pre><code>yyyy</code></pre> <jjj> <pre><code>asdkjaslkd</code></pre> END"
445
+ parse(s).should == s.sub('<jjj>','&lt;jjj&gt;')
446
+ end
447
+
448
+ it "works with unicode" do
449
+ s = "привет <pre><code> жжж </code></pre> пока!"
450
+ parse(s).should match(%r|привет ?<pre><code> жжж</code></pre> ?пока!|)
451
+
452
+ s = 'абвгдеёжзийклмнопрстуфхцчшщьыъэюя'
453
+ parse(s).should == s
454
+
455
+ s = 'АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЬЫЪЭЮЯ'
456
+ parse(s).should == s
457
+
458
+ s = '☸☹☺☻☼☽☾☿'
459
+ parse(s).should == s
460
+ end
461
+
462
+ it "should escape lone closing tags" do
463
+ s = "</code></pre>"
464
+ parse(s).should == h(s)
465
+ end
466
+
467
+ it "should skip newlines and spaces at end" do
468
+ s = "<pre><code> aaa bbb ccc \n\n\n \t\n\n\n\r\n\r\n \t </code></pre>"
469
+ parse(s).should == "<pre><code> aaa bbb ccc</code></pre>"
470
+ end
471
+
472
+ it "escapes html chars" do
473
+ HTML_ESCAPE.each do |k,v|
474
+ parse("<pre><code>#{k}</code></pre>").should == "<pre><code>#{v}</code></pre>"
475
+ end
476
+ end
477
+ end
478
+
479
+ ###############################################################################
480
+
481
+ describe "Assembla Links" do
482
+ a = {}
483
+ a["wiki:Name"] = '<a class="wiki_link" title="Name" href="/wiki/show/test_space/Name">Name</a>'
484
+ a["Name"] = '<a class="wiki_link" title="Name" href="/wiki/show/test_space/Name">Name</a>'
485
+ a["Name#Ref"] = '<a class="wiki_link" title="Name#Ref" href="/wiki/show/test_space/Name#Ref">Name#Ref</a>'
486
+ a["#Ref"] = '<a href="#Ref" title="#Ref" class="wiki_link">#Ref</a>'
487
+ a["#привет"] = %Q|<a href="##{hex_string("привет")}" title="#привет" class="wiki_link">#привет</a>|
488
+ a["#with spc"] = %Q|<a href="#with__spc" title="#with spc" class="wiki_link">#with spc</a>|
489
+ a["#with__usc"] = %Q|<a href="#with__usc" title="#with__usc" class="wiki_link">#with__usc</a>|
490
+ a["#with--dsh"] = %Q|<a href="#with--dsh" title="#with--dsh" class="wiki_link">#with--dsh</a>|
491
+ a["#with!xclm"] = %Q|<a href="##{hex_string("with!xclm")}" title="#with!xclm" class="wiki_link">#with!xclm</a>|
492
+ a["#with&amp"] = %Q|<a href="##{hex_string("with&amp")}" title="#with&amp" class="wiki_link">#with&amp;amp</a>|
493
+
494
+ a["ticket:234"] = '<a href="/spaces/test_space/tickets/234">#234</a>'
495
+ a["revision:1f4bdab77be696efd"] =
496
+ '<a href="http://code.assembla.com/test_space/git/changesets/1f4bdab77be696efd">revision:1f4bdab77be696efd</a>'
497
+ a["revision:12345"] =
498
+ '<a href="http://code.assembla.com/test_space/svn/changesets/12345">revision:12345</a>'
499
+ a["r:2345"] = '<a href="http://code.assembla.com/test_space/svn/changesets/2345">revision:2345</a>'
500
+ a["r:2345ef"] = '<a href="http://code.assembla.com/test_space/git/changesets/2345ef">revision:2345ef</a>'
501
+
502
+ a["url:http://www.ru"] = '<a href="http://www.ru">http://www.ru</a>'
503
+ a["url:https://www.ru"] = '<a href="https://www.ru">https://www.ru</a>'
504
+ a["url:www.ru"] = '<a href="http://www.ru">http://www.ru</a>'
505
+ a["url:www.ru/?a=1&b=2"] = '<a href="http://www.ru/?a=1&b=2">http://www.ru/?a=1&amp;b=2</a>'
506
+ a["url:ftp://www.ru"] = '<a href="ftp://www.ru">ftp://www.ru</a>'
507
+ a["url:/spaces/x2"] = '<a href="/spaces/x2">/spaces/x2</a>'
508
+
509
+ a.each do |k,v|
510
+ it "parses [[#{k}]]" do
511
+ parse("[[#{k}]]").should == v
512
+ end
513
+ it "parses [[#{k}|привет тест]]" do
514
+ parse("[[#{k}|привет тест]]").should == v.sub(/>.*</,">привет тест<")
515
+ end
516
+ it "parses [[#{k}|test & here]]" do
517
+ parse("[[#{k}|test & here]]").should == v.sub(/>.*</,">test &amp; here<")
518
+ end
519
+ end
520
+
521
+ it "keeps unknown link types" do
522
+ s = "[[zzz:xxx]]"
523
+ parse(s).should == s
524
+ s = "[[abcd:1234]]"
525
+ parse(s).should == s
526
+ s = "[[abcd::1234]] [[abcd:1234]] [[uri:www.ru]]"
527
+ parse(s).should == s
528
+ end
529
+
530
+ it "links to ExistingFile.txt"
531
+ it "links to NotExistingFile.txt"
532
+ it "links to ExistingImage.png"
533
+ it "links to NotExistingImage.png"
534
+ end
535
+
536
+ ###############################################################################
537
+ ###############################################################################
538
+ ###############################################################################
539
+
540
+ HTML_ESCAPE = { '&' => '&amp;', '>' => '&gt;', '<' => '&lt;', '"' => '&quot;' }
541
+
542
+ def h s
543
+ s.to_s.gsub(/[&"><]/) { |special| HTML_ESCAPE[special] }
544
+ end
545
+
546
+ def parse s
547
+ BreakoutParser.parse(s).strip
548
+ end
549
+ end
metadata ADDED
@@ -0,0 +1,71 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: breakout_parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ platform: x86-mswin32
6
+ authors:
7
+ - Andrey "Zed" Zaikin
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2010-01-19 00:00:00 +05:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: rspec
17
+ type: :development
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 1.2.9
24
+ version:
25
+ description: BreakoutParser
26
+ email: zed.0xff@gmail.com
27
+ executables: []
28
+
29
+ extensions: []
30
+
31
+ extra_rdoc_files:
32
+ - LICENSE
33
+ files:
34
+ - .gitignore
35
+ - LICENSE
36
+ - Rakefile
37
+ - VERSION
38
+ - lib/breakout_parser.rb
39
+ - lib/breakout_parser/win32-ruby1.8/breakout_parser.so
40
+ - spec/parser_examples_spec.rb
41
+ - spec/parser_spec.rb
42
+ has_rdoc: true
43
+ homepage: http://assembla.com
44
+ licenses: []
45
+
46
+ post_install_message:
47
+ rdoc_options:
48
+ - --charset=UTF-8
49
+ require_paths:
50
+ - lib
51
+ required_ruby_version: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: "0"
56
+ version:
57
+ required_rubygems_version: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: "0"
62
+ version:
63
+ requirements: []
64
+
65
+ rubyforge_project:
66
+ rubygems_version: 1.3.5
67
+ signing_key:
68
+ specification_version: 3
69
+ summary: BreakoutParser
70
+ test_files:
71
+ - spec/parser_spec.rb