RubyGems - breakout_parser - Versions diffs - 0.0.0-x86-mswin32 - Mend

breakout_parser 0.0.0-x86-mswin32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

data/.gitignore +16 -0
data/LICENSE +39 -0
data/Rakefile +99 -0
data/VERSION +1 -0
data/lib/breakout_parser/win32-ruby1.8/breakout_parser.so +0 -0
data/lib/breakout_parser.rb +6 -0
data/spec/parser_examples_spec.rb +101 -0
data/spec/parser_spec.rb +549 -0
metadata +71 -0

data/.gitignore ADDED Viewed

@@ -0,0 +1,16 @@
+wiki
+pkg
+lex.yy.c
+*.tab.c
+*.tab.h
+*.tmp
+ext/breakout_parser/*.so
+*.o
+*.def
+*.exp
+*.lib
+*.pdb
+*.obj
+*.manifest
+*.gemspec
+Makefile

data/LICENSE ADDED Viewed

@@ -0,0 +1,39 @@
+Copyright (c) 2010 Assembla, Inc.
+SINGLETON DEVELOPMENT LICENSE
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+* This is a license for research and development use only, and does not
+include a license for commercial use. If you desire a license for commercial
+use, you must contact the copyright owner for a commercial use license,
+which would supersede the terms of this license. "Commercial Use" means any
+use (internal or external), copying, sublicensing or distribution
+(internal or external), directly or indirectly, for commercial or strategic
+gain or advantage, including use in internal operations or in providing
+products or services to any third party. Research and development for
+eventual commercial use is not "Commercial Use" so long as a commercial
+use license is obtained prior to commercial use. Redistribution to others
+for their research and development use is not "Commercial Use".
+* Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution. Redistribution in
+binary form does not require redistribution of source code.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+THE POSSIBILITY OF SUCH DAMAGE.

data/Rakefile ADDED Viewed

@@ -0,0 +1,99 @@
+require 'rubygems'
+require 'rake'
+begin
+  require 'jeweler'
+  def gen_tasks
+    Jeweler::Tasks.new do |gem|
+      # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
+      gem.name = "breakout_parser"
+      gem.platform = ENV['PLATFORM'] if ENV['PLATFORM']
+      gem.summary = %Q{BreakoutParser}
+      gem.description = %Q{BreakoutParser}
+      gem.email = "zed.0xff@gmail.com"
+      gem.homepage = "http://assembla.com"
+      gem.authors = ["Andrey \"Zed\" Zaikin"]
+      gem.add_development_dependency "rspec", ">= 1.2.9"
+      gem.test_files.delete 'spec/parser_examples_spec.rb'
+      gem.files.delete_if{ |f| f[0..8] == 'examples/' }
+      gem.files.delete_if{ |f| f[0..4] == 'misc/' }
+      if gem.platform == 'ruby'
+        gem.files.include 'ext/**/*'
+        gem.files.delete  "ext/breakout_parser/Makefile"
+        gem.files.delete_if{ |f| f[-3..-1] == '.so' }
+      else
+        gem.files.include 'lib/**/*.so'
+        gem.extensions = '.' # HACK: package no extensions
+      end
+    end
+    Jeweler::GemcutterTasks.new
+  end
+  gen_tasks
+rescue LoadError
+  puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
+end
+require 'spec/rake/spectask'
+Spec::Rake::SpecTask.new(:spec) do |spec|
+  spec.libs << 'lib' << 'spec'
+  spec.spec_files = FileList['spec/**/*_spec.rb']
+end
+Spec::Rake::SpecTask.new(:rcov) do |spec|
+  spec.libs << 'lib' << 'spec'
+  spec.pattern = 'spec/**/*_spec.rb'
+  spec.rcov = true
+end
+task :spec => :check_dependencies
+task :default => :spec
+require 'rake/rdoctask'
+Rake::RDocTask.new do |rdoc|
+  version = File.exist?('VERSION') ? File.read('VERSION') : ""
+  rdoc.rdoc_dir = 'rdoc'
+  rdoc.title = "breakout_parser #{version}"
+#  rdoc.rdoc_files.include('README*')
+  rdoc.rdoc_files.include('lib/**/*.rb')
+end
+######################
+namespace :build do
+  desc "Build all gem variants"
+  task :all do
+    Rake::Task[ :build ].execute
+    @gems_to_push = []
+    @gems_to_push << Rake.application.jeweler.gemspec_helper.gem_path
+    gem = Rake.application.jeweler_tasks.gemspec
+    gem.files.delete_if{ |f| f[0..3] == 'ext/' }
+    gem.extensions = []
+    gem.files.include 'lib/**/*.so'
+    gem.original_platform = nil
+    gem.platform = 'x86-mingw32'
+    Rake::Task[ :build ].execute
+    @gems_to_push << Rake.application.jeweler.gemspec_helper.gem_path
+    gem.original_platform = nil
+    gem.platform = 'x86-mswin32'
+    Rake::Task[ :build ].execute
+    @gems_to_push << Rake.application.jeweler.gemspec_helper.gem_path
+  end
+end
+namespace 'gemcutter:release' do
+  desc "Release all gem variants"
+  task :all => 'build:all' do
+    @gems_to_push.each do |fname|
+      command = "gem push #{fname}"
+      puts "Executing #{command.inspect}:"
+      sh command
+    end
+  end
+end

data/VERSION ADDED Viewed

	@@ -0,0 +1 @@
1	+ 0.0.0

data/lib/breakout_parser/win32-ruby1.8/breakout_parser.so ADDED Viewed

Binary file

data/lib/breakout_parser.rb ADDED Viewed

@@ -0,0 +1,6 @@
+if RUBY_PLATFORM =~/(mswin|mingw)/i
+  # Fat binary gems, you make the Rockin' world go round
+  require "breakout_parser/win32-ruby#{RUBY_VERSION.sub(/\.\d+$/, '')}/breakout_parser"
+else
+  require 'breakout_parser/breakout_parser'
+end

data/spec/parser_examples_spec.rb ADDED Viewed

@@ -0,0 +1,101 @@
+describe 'BreakoutParser' do
+  describe "bad examples" do
+    Dir["examples/orig/*.bad"].sort.each do |fname|
+      it "should not die on #{fname} " do
+        data = File.read(fname)
+        parse_file(fname).size.should >= File.read(fname).strip.gsub(/\s+/,' ').size
+      end
+    end
+  end
+  describe "pending examples" do
+    Dir["examples/orig/*.pending"].sort.each do |fname|
+      it "should parse #{fname} "
+    end
+  end
+  describe "preparsed examples" do
+    Dir["examples/orig/*.txt"].sort.each do |fname|
+      bname = File.basename(fname)
+      it "should parse #{fname} " do
+        preparsed = File.read("examples/parsed/#{bname}")
+        preparsed = preparsed[3..-1] if preparsed[0..2] == '<p>'
+        preparsed = preparsed[0..-5] if preparsed[-4..-1] == '</p>'
+        preparsed.gsub!("&#8211;","-")
+        preparsed.gsub!("&#8212;","--")
+        preparsed.gsub!("&#8216;","'")
+        preparsed.gsub!("&#8217;","'")
+        preparsed.gsub!("&#8230;","...")
+        preparsed.gsub!("&#215;","x")
+        preparsed.gsub!("&#169;","(c)")
+        preparsed.gsub!("<br />\n","<br />")
+        preparsed.gsub!(/[ \t]+<br \/>/,"<br />")
+        preparsed.gsub!("\t"," ")
+        if preparsed['<hr />']
+          # find longest dash-line in source
+          dashline = File.read(fname).scan(/-+/).sort_by{ |x| -x.length }.first
+          preparsed.gsub!("</p>\n<hr />\n<p>","<br /><br />#{dashline}<br /><br />");
+        end
+#        preparsed.gsub!(/^<p>/,"");
+#        preparsed.gsub!(/<\/p>$/,"");
+        preparsed.gsub!("</pre>\n<ol>","</pre><br /><br /><ol>")
+        preparsed.gsub!(/<\/p>\s+<p>/,"<br /><br />")
+        preparsed.gsub!("</p>\n","<br /><br />")
+        preparsed.gsub!("<p>","<br /><br />")
+        preparsed.gsub!(/[\r\n]+ */," ")
+        preparsed.gsub!(/[ \t]{2,}/," ")
+        preparsed.gsub!("<del>","-")
+        preparsed.gsub!("</del>","-")
+        preparsed.gsub!(/<br \/>[ ]+/,"<br />")
+        preparsed.gsub!(/(<br \/>){2,}/,"<br /><br />")
+#        preparsed.gsub!("<br /><ol>","<ol>")
+#        preparsed.gsub!("<br /><ul>","<ul>")
+#        preparsed.gsub!("<br /><br /><ul>","<br /><ul>")
+        parsed = parse_file(fname)
+        # old parser not parses raw text urls
+        #parsed.gsub!(%r'<a href="([^<>"]+)">([^<>"]+)</a>',"\\1")
+        t1 = parsed
+        t2 = preparsed
+        [t1,t2].each do |t|
+          t.downcase!
+          t.gsub!(/(\s*<br \/>\s*)+/,' ')
+          t.gsub!(/\n\s*/,"\n")
+#          t.gsub!(/>[ \t]+</,"><")
+          t.gsub!(/>[ \t]+/,">")
+          t.gsub!(/[ \t]+</,"<")
+          t.gsub!(/[\r\n \t]+/," ")
+          t.strip!
+        end
+        if t1 != t2
+#          File.open("last-parsed.tmp","w"){ |f| f << parsed }
+#          File.open("last-preparsed.tmp","w"){ |f| f << preparsed }
+          pos = 0
+          pos += 1 while t1[0..pos] == t2[0..pos]
+          pos -= 5
+          pos = 0 if pos<0
+          t1[pos..-1].should == t2[pos..-1]
+        end
+        t1.should == t2
+      end
+      $n ||= 0
+      $n +=  1
+#      break if $n == 1900
+    end
+  end
+###############################################################################
+###############################################################################
+###############################################################################
+  def parse_file fname
+    r = `cat #{fname} | ./parser`
+    r.strip
+  end
+end

data/spec/parser_spec.rb ADDED Viewed

@@ -0,0 +1,549 @@
+require 'breakout_parser'
+describe 'BreakoutParser' do
+  def self.hex_string s
+    s.each_byte.to_a.map{ |c| "%02x" % c }.join
+  end
+  def hex_string s; self.class.hex_string(s); end
+  it 'converts \n to <br />' do
+    parse("aaa\nbbb").should match(%r"aaa ?<br /> ?bbb")
+  end
+  it "parses 1M file #1" do
+    s = 'a' * 1024 * 1024
+    parse(s).size.should == s.size
+  end
+  it "parses 1M file #2" do
+    s = 'a' + (' ' * 1024 * 1024) + 'b'
+    parse(s).should == 'a b'
+  end
+  it "parses 1M file #3" do
+    s = 'a ' * 1024 * 512
+    parse(s).size.should == s.strip.size
+  end
+  it "strips tailing spaces and newlines" do
+    parse("aaa         ").should == "aaa"
+    parse("aaa\t\t\t\t\t\t").should == "aaa"
+    parse("aaa\r\r\r\r\r").should == "aaa"
+    parse("aaa\n\n\n\n\n").should == "aaa"
+    parse("aaa\r\n\r\n\r\n\r\n").should == "aaa"
+    parse("aaa\r\n\t   \t  \n  \r   \n   \t  \t\n\r   ").should == "aaa"
+  end
+  it "strips leading spaces and newlines" do
+    parse("         aaa").should == "aaa"
+    parse("\t\t\t\t\t\taaa").should == "aaa"
+    parse("\r\r\r\r\raaa").should == "aaa"
+    parse("\n\n\n\n\naaa").should == "aaa"
+    parse("\r\n\r\n\r\n\r\naaa").should == "aaa"
+    parse("\r\n\t   \t  \n  \r   \n   \t  \t\n\r   aaa").should == "aaa"
+  end
+  it "converts two or more \\n to single empty line" do
+    parse("aaa\n\nbbb").should == "aaa<br /><br />bbb"
+    parse("aaa\n  \nbbb").should == "aaa<br /><br />bbb"
+    parse("aaa\n\n\nbbb").should == "aaa<br /><br />bbb"
+    parse("aaa\n   \n    \nbbb").should == "aaa<br /><br />bbb"
+    parse("aaa\r\n   \r\n    \r\nbbb").should == "aaa<br /><br />bbb"
+    parse("aaa\n   \n\n    \nbbb").should == "aaa<br /><br />bbb"
+    parse("aaa\n   \n\n\n    \nbbb").should == "aaa<br /><br />bbb"
+    parse("aaa\n\n\n\n\n\n\nbbb").should == "aaa<br /><br />bbb"
+    parse("aaa\r\n\r\n\r\nbbb").should == "aaa<br /><br />bbb"
+  end
+###############################################################################
+  describe "*bold*" do
+    it "only" do
+      parse("*bold*").should == '<strong>bold</strong>'
+    end
+    it "at beginning" do
+      parse("*bold*\nxxx").should == '<strong>bold</strong><br />xxx'
+    end
+    it "in the middle of text" do
+      parse("xxx *bold* yyy").should == 'xxx <strong>bold</strong> yyy'
+    end
+    it "parses *multiline\\nbold*" do
+      parse("*multiline\nbold*").should == "<strong>multiline<br />bold</strong>"
+    end
+    it "skips lone star inside bold block" do
+      parse("*aaa * bbb*").should == '<strong>aaa * bbb</strong>'
+    end
+    it "skips lone star" do
+      parse("aaa * bbb").should == 'aaa * bbb'
+    end
+    it "w/o closing tag" do
+      parse("*bold").should == '<strong>bold</strong>'
+    end
+    it "nesting1 w/o closing tags" do
+      parse("*bold1 *bold2").should == '<strong>bold1 <strong>bold2</strong></strong>'
+    end
+    it "nesting2 w/o closing tags" do
+      parse("*bold1  *bold2").should == '<strong>bold1 <strong>bold2</strong></strong>'
+    end
+    it "not parses '*.*'" do
+      parse("*.*").should == "*.*"
+      parse(" *.* ").should == "*.*"
+      parse("aaa *.* bbb").should == "aaa *.* bbb"
+    end
+    it "not parses '*.something'" do
+      parse("*.exe").should == "*.exe"
+      parse(" *.exe ").should == "*.exe"
+      parse("aaa *.exe bbb").should == "aaa *.exe bbb"
+    end
+  end
+###############################################################################
+  describe "_italic_" do
+    it "only" do
+      parse("_italic_").should == '<em>italic</em>'
+    end
+    it "at beginning" do
+      parse("_italic_\nxxx").should == '<em>italic</em><br />xxx'
+    end
+    it "in the middle of text" do
+      parse("xxx _italic_ yyy").should == 'xxx <em>italic</em> yyy'
+    end
+    it "parses _multiline\\nitalic_" do
+      parse("_multiline\nitalic_").should == "<em>multiline<br />italic</em>"
+    end
+    it "skips lone underscore inside italic block" do
+      parse("_aaa _ bbb_").should == '<em>aaa _ bbb</em>'
+    end
+    it "skips lone underscore" do
+      parse("aaa _ bbb").should == 'aaa _ bbb'
+    end
+    it "w/o closing tag" do
+      parse("_italic").should == '<em>italic</em>'
+    end
+    it "nesting1 w/o closing tags" do
+      parse("_italic1 _italic2").should == '<em>italic1 <em>italic2</em></em>'
+    end
+    it "nesting2 w/o closing tags" do
+      parse("_italic1  _italic2").should == '<em>italic1 <em>italic2</em></em>'
+    end
+  end
+###############################################################################
+  describe "combinations" do
+    it "bold in italic" do
+      s = "_aaa *bbb* ccc_"
+      parse(s).should == "<em>aaa <strong>bbb</strong> ccc</em>"
+    end
+    it "bold in italic - no closing1" do
+      s = "_aaa *bbb* ccc"
+      parse(s).should == "<em>aaa <strong>bbb</strong> ccc</em>"
+    end
+    it "bold in italic - no closing2" do
+      s = "_aaa *bbb ccc"
+      parse(s).should == "<em>aaa <strong>bbb ccc</strong></em>"
+    end
+    it "italic in bold" do
+      s = "*aaa _bbb_ ccc*"
+      parse(s).should == "<strong>aaa <em>bbb</em> ccc</strong>"
+    end
+    it "italic in bold - no closing1" do
+      s = "*aaa _bbb_ ccc"
+      parse(s).should == "<strong>aaa <em>bbb</em> ccc</strong>"
+    end
+    it "italic in bold - no closing2" do
+      s = "*aaa _bbb ccc"
+      parse(s).should == "<strong>aaa <em>bbb ccc</em></strong>"
+    end
+    {'ul' => '*', 'ol' => '#'}.each do |l,c|
+      it "raw text link inside #{l.upcase}> #1" do
+        s = "#{c} aaa http://www.ru"
+        parse(s).should == "<#{l}><li>aaa <a href=\"http://www.ru\">http://www.ru</a></li></#{l}>"
+      end
+      it "raw text link inside #{l.upcase}> #2" do
+        s = "#{c} aaa http://www.ru\n#{c} bbb"
+        parse(s).should == "<#{l}><li>aaa <a href=\"http://www.ru\">http://www.ru</a></li><li>bbb</li></#{l}>"
+      end
+      it "raw text link inside #{l.upcase}> #3" do
+        s = "#{c} http://www.ru"
+        parse(s).should == "<#{l}><li><a href=\"http://www.ru\">http://www.ru</a></li></#{l}>"
+      end
+      it "raw text link inside #{l.upcase}> #4" do
+        s = "#{c} aaa http://www.ru bbb"
+        parse(s).should == "<#{l}><li>aaa <a href=\"http://www.ru\">http://www.ru</a> bbb</li></#{l}>"
+      end
+      it "two links inside #{l.upcase}>" do
+        s = "#{c} aaa http://www.ru http://ya.ru bbb"
+        parse(s).should == "<#{l}><li>aaa <a href=\"http://www.ru\">http://www.ru</a> <a href=\"http://ya.ru\">http://ya.ru</a> bbb</li></#{l}>"
+      end
+    end
+  end
+###############################################################################
+  describe "unnumbered list" do
+    it "should work" do
+      parse("* a\n* b\n* c").should match(
+        %r"<ul><li>a</li><li>b</li><li>c</li></ul>"
+      )
+    end
+    it "two lists" do
+      s = "* a\n* b\n* c"
+      s = s + "\nxxx\n" + s
+      r = "<ul><li>a</li><li>b</li><li>c</li></ul>"
+      parse(s).should == "#{r}xxx<br />#{r}"
+    end
+    it "in middle of text when begins with space" do
+      parse("hello\n * a\n  * b\n * c\nworld").should ==
+        "hello<br /><ul><li>a</li><li>b</li><li>c</li></ul>world"
+    end
+    it "in middle of text" do
+      parse("hello\n* a\n* b\n* c\nworld").should ==
+        "hello<br /><ul><li>a</li><li>b</li><li>c</li></ul>world"
+    end
+    it "after blank line" do
+      parse("hello\n\n * a\n * b\n * c\nworld").should ==
+        "hello<br /><br /><ul><li>a</li><li>b</li><li>c</li></ul>world"
+    end
+  end
+###############################################################################
+  describe "numbered list" do
+    it "should work" do
+      parse("# a\n# b\n# c").should match(
+        %r"<ol><li>a</li><li>b</li><li>c</li></ol>"
+      )
+    end
+    it "two lists" do
+      s = "# a\n# b\n# c"
+      s = s + "\nxxx\n" + s
+      r = "<ol><li>a</li><li>b</li><li>c</li></ol>"
+      parse(s).should == "#{r}xxx<br />#{r}"
+    end
+    it "in middle of text when begins with space" do
+      parse("hello\n # a\n  # b\n # c\nworld").should ==
+        "hello<br /><ol><li>a</li><li>b</li><li>c</li></ol>world"
+    end
+    it "in middle of text" do
+      parse("hello\n# a\n# b\n# c\nworld").should ==
+        "hello<br /><ol><li>a</li><li>b</li><li>c</li></ol>world"
+    end
+    it "after blank line" do
+      parse("hello\n\n # a\n # b\n # c\nworld").should ==
+        "hello<br /><br /><ol><li>a</li><li>b</li><li>c</li></ol>world"
+    end
+  end
+###############################################################################
+  1.upto(5) do |lvl|
+    describe "H#{lvl}" do
+      it "at the beginning" do
+        parse("h#{lvl}. xxx").should == "<h#{lvl} id=\"xxx\">xxx</h#{lvl}>"
+      end
+      it "after 1 line of text" do
+        parse("abcd\nh#{lvl}. xxx").should == "abcd<br /><h#{lvl} id=\"xxx\">xxx</h#{lvl}>"
+      end
+      it "after 2 lines of text" do
+        parse("abcd\ndefgh\nh#{lvl}. xxx").should == "abcd<br />defgh<br /><h#{lvl} id=\"xxx\">xxx</h#{lvl}>"
+      end
+      it "in middle of other words" do
+        parse("abcd defgh h#{lvl}. xxx yyy").should == "abcd defgh h#{lvl}. xxx yyy"
+      end
+      it "in middle of other lines" do
+        parse("abcd defgh\nh#{lvl}. xxx\nyyy").should == "abcd defgh<br /><h#{lvl} id=\"xxx\">xxx</h#{lvl}><br />yyy"
+      end
+      it "converts spaces to underscores in id" do
+        parse("h#{lvl}. xxx   yyy z").should == "<h#{lvl} id=\"xxx___yyy_z\">xxx   yyy z</h#{lvl}>"
+      end
+      it "keeps underscores in id" do
+        parse("h#{lvl}. xxx___yyy_z").should == "<h#{lvl} id=\"xxx___yyy_z\">xxx___yyy_z</h#{lvl}>"
+      end
+      it "keeps dashes in id" do
+        parse("h#{lvl}. xxx---yyy-z").should == "<h#{lvl} id=\"xxx---yyy-z\">xxx---yyy-z</h#{lvl}>"
+      end
+      it "keeps dots in id" do
+        parse("h#{lvl}. xxx...yyy.z").should == "<h#{lvl} id=\"xxx...yyy.z\">xxx...yyy.z</h#{lvl}>"
+      end
+      %w'Ъ ъ : ; , привет" \' ! < >'.each do |c|
+        it "converts id to hex if it contains \"#{c}\"" do
+          idhex = hex_string("xxx#{c}yyy")
+          parse("h#{lvl}. xxx#{c}yyy").should == "<h#{lvl} id=\"#{idhex}\">xxx#{h(c)}yyy</h#{lvl}>"
+        end
+      end
+      it "skips excess spaces" do
+        parse("h#{lvl}.  \t  xxx   \t ").should == "<h#{lvl} id=\"xxx\">xxx</h#{lvl}>"
+      end
+      it "thinks that \\r is EOL" do
+        parse("h#{lvl}. xxx\ryyy").should == "<h#{lvl} id=\"xxx\">xxx</h#{lvl}><br />yyy"
+        parse("h#{lvl}. xxx\r").should == "<h#{lvl} id=\"xxx\">xxx</h#{lvl}>"
+        parse("h#{lvl}. xxx\r yyy").sub(' yyy','yyy').should ==
+          "<h#{lvl} id=\"xxx\">xxx</h#{lvl}><br />yyy"
+      end
+    end
+  end
+###############################################################################
+  describe "raw text links" do
+    it "at the beginning" do
+      parse("http://www.ru").should == "<a href=\"http://www.ru\">http://www.ru</a>"
+    end
+    it "in middle of other words" do
+      parse("aaa bbb ccc http://www.ru ddd eee fff").should ==
+        "aaa bbb ccc <a href=\"http://www.ru\">http://www.ru</a> ddd eee fff"
+    end
+    it "in new line" do
+      parse("aaa bbb ccc\nhttp://www.ru\nddd eee fff").should match(
+        %r"aaa bbb ccc ?<br /> ?<a href=\"http://www.ru\">http://www.ru</a> ?<br /> ?ddd eee fff"
+      )
+    end
+    it "escapes '&' in link _text_" do
+      parse("http://www.ru/?a=1&b=2").should == "<a href=\"http://www.ru/?a=1&b=2\">http://www.ru/?a=1&amp;b=2</a>"
+    end
+    it "parses https://" do
+      parse("https://www.ru").should == "<a href=\"https://www.ru\">https://www.ru</a>"
+    end
+    %w', .'.each do |c|
+      it "stops parsing on \"#{c} \"" do
+        parse("http://www.ru#{c}").should == "<a href=\"http://www.ru\">http://www.ru</a>#{c}"
+        parse(" http://www.ru#{c} ").should == "<a href=\"http://www.ru\">http://www.ru</a>#{c}"
+        parse(" http://www.ru#{c} hello!").should == "<a href=\"http://www.ru\">http://www.ru</a>#{c} hello!"
+        parse("xxx http://www.ru#{c} hello!").should == "xxx <a href=\"http://www.ru\">http://www.ru</a>#{c} hello!"
+        parse(" http://www.ru/#{c} hello!").should == "<a href=\"http://www.ru/\">http://www.ru/</a>#{c} hello!"
+        parse(" http://aaa.com#{c} http://bbb.com").should ==
+          "<a href=\"http://aaa.com\">http://aaa.com</a>#{c} <a href=\"http://bbb.com\">http://bbb.com</a>"
+      end
+    end
+  end
+###############################################################################
+  describe "#ticketNum ticket links" do
+    it "at the beginning" do
+      parse("#1234").should == '<a href="/spaces/test_space/tickets/1234">#1234</a>'
+    end
+    it "in middle of other words" do
+      parse("aaa bbb ccc #3476 ddd eee fff").should ==
+        'aaa bbb ccc <a href="/spaces/test_space/tickets/3476">#3476</a> ddd eee fff'
+    end
+    it "in new line" do
+      parse("aaa bbb ccc\n#1234\nddd eee fff").should match(
+        %r|aaa bbb ccc ?<br /> ?<a href="/spaces/test_space/tickets/1234">#1234</a> ?<br /> ?ddd eee fff|
+      )
+    end
+    it "ignores non-digits" do
+      parse("#1234d").should == '#1234d'
+      parse("#xxx").should == '#xxx'
+    end
+  end
+###############################################################################
+  describe "<pre><code>..</code></pre>" do
+    it "works" do
+      s = <<-EOF
+    for ( n = 0; n < max_size && \
+           (c = getc( yyin )) != EOF && c != '\\n'; ++n ) \
+      buf[n] = (char) c; \
+      EOF
+      parse("<pre><code>#{s.strip}</code></pre>").should ==
+        "<pre><code>#{h(s.strip)}</code></pre>"
+      s = <<-EOF
+          while ( 1 < 2 ) do
+            puts "<b>12345\\t54321</b>"
+            // *bold* comment
+            // _italic_ comment
+          end
+          ---
+          * aaa
+          * bbb
+          * ccc
+      EOF
+      parse("<pre><code>#{s.strip}</code></pre>").should ==
+        "<pre><code>#{h(s.strip)}</code></pre>"
+    end
+    it "not parses *bold*" do
+      s = "<pre><code> *bold*</code></pre>"
+      parse(s).should == s
+    end
+    it "not parses _italic_" do
+      s = "<pre><code> _italic_</code></pre>"
+      parse(s).should == s
+    end
+    it "not parses UL lists" do
+      s = "<pre><code>\n * l1\n * l2\n * l3</code></pre>"
+      parse(s).should == s.sub("<code>\n","<code>")
+    end
+    it "not parses OL lists" do
+      s = "<pre><code>\n # l1\n # l2\n # l3</code></pre>"
+      parse(s).should == s.sub("<code>\n","<code>")
+    end
+    it "not parses H1..H5" do
+      1.upto(5) do |i|
+        s = "<pre><code>\nh#{i}. zzzzzzz\n</code></pre>"
+        parse(s).should == "<pre><code>h#{i}. zzzzzzz</code></pre>"
+      end
+    end
+    it "not parses raw text links" do
+      s = "<pre><code>xxx http://www.ru yyy</code></pre>"
+      parse(s).should == s
+      s = "<pre><code>http://www.ru</code></pre>"
+      parse(s).should == s
+    end
+    it "keeps newlines" do
+      s = "<pre><code>aaa\nbbb</code></pre>"
+      parse(s).should == s
+      s = "<pre><code>aaa\n\nbbb\nccc</code></pre>"
+      parse(s).should == s
+    end
+    it "with no spaces between <pre> and <code>" do
+      s = "<pre><code>aaa</code></pre>"
+      parse(s).should == s
+    end
+    it "with spaces between <pre> and <code>" do
+      s = "<pre>    <code>aaa</code>           </pre>"
+      parse(s).should == s.tr(' ','')
+    end
+    it "with spaces between <pre> and <code> and inside" do
+      s = "<pre>    <code>  aaa   bbb   </code>           </pre>"
+      parse(s).should == "<pre><code>  aaa   bbb</code></pre>"
+    end
+    it "w/o closing tags" do
+      s = "<pre><code>aaa"
+      parse(s).should match(%r"<pre><code>aaa\n?</code></pre>")
+    end
+    it "in middle of text" do
+      s = "xxx <pre><code>yyyy</code></pre> jjj"
+      parse(s).should == s
+    end
+    it "with 2 instances" do
+      s = "xxx <pre><code>yyyy</code></pre> <jjj> <pre><code>asdkjaslkd</code></pre> END"
+      parse(s).should == s.sub('<jjj>','&lt;jjj&gt;')
+    end
+    it "works with unicode" do
+      s = "привет <pre><code> жжж </code></pre> пока!"
+      parse(s).should match(%r|привет ?<pre><code> жжж</code></pre> ?пока!|)
+      s = 'абвгдеёжзийклмнопрстуфхцчшщьыъэюя'
+      parse(s).should == s
+      s = 'АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЬЫЪЭЮЯ'
+      parse(s).should == s
+      s = '☸☹☺☻☼☽☾☿'
+      parse(s).should == s
+    end
+    it "should escape lone closing tags" do
+      s = "</code></pre>"
+      parse(s).should == h(s)
+    end
+    it "should skip newlines and spaces at end" do
+      s = "<pre><code> aaa bbb ccc \n\n\n  \t\n\n\n\r\n\r\n   \t  </code></pre>"
+      parse(s).should == "<pre><code> aaa bbb ccc</code></pre>"
+    end
+    it "escapes html chars" do
+      HTML_ESCAPE.each do |k,v|
+        parse("<pre><code>#{k}</code></pre>").should == "<pre><code>#{v}</code></pre>"
+      end
+    end
+  end
+###############################################################################
+  describe "Assembla Links" do
+    a = {}
+    a["wiki:Name"]  = '<a class="wiki_link" title="Name" href="/wiki/show/test_space/Name">Name</a>'
+    a["Name"]       = '<a class="wiki_link" title="Name" href="/wiki/show/test_space/Name">Name</a>'
+    a["Name#Ref"]   = '<a class="wiki_link" title="Name#Ref" href="/wiki/show/test_space/Name#Ref">Name#Ref</a>'
+    a["#Ref"]       = '<a href="#Ref" title="#Ref" class="wiki_link">#Ref</a>'
+    a["#привет"]    = %Q|<a href="##{hex_string("привет")}" title="#привет" class="wiki_link">#привет</a>|
+    a["#with  spc"] = %Q|<a href="#with__spc" title="#with  spc" class="wiki_link">#with  spc</a>|
+    a["#with__usc"] = %Q|<a href="#with__usc" title="#with__usc" class="wiki_link">#with__usc</a>|
+    a["#with--dsh"] = %Q|<a href="#with--dsh" title="#with--dsh" class="wiki_link">#with--dsh</a>|
+    a["#with!xclm"] = %Q|<a href="##{hex_string("with!xclm")}" title="#with!xclm" class="wiki_link">#with!xclm</a>|
+    a["#with&amp"]  = %Q|<a href="##{hex_string("with&amp")}" title="#with&amp" class="wiki_link">#with&amp;amp</a>|
+    a["ticket:234"] = '<a href="/spaces/test_space/tickets/234">#234</a>'
+    a["revision:1f4bdab77be696efd"] =
+      '<a href="http://code.assembla.com/test_space/git/changesets/1f4bdab77be696efd">revision:1f4bdab77be696efd</a>'
+    a["revision:12345"] =
+      '<a href="http://code.assembla.com/test_space/svn/changesets/12345">revision:12345</a>'
+    a["r:2345"] = '<a href="http://code.assembla.com/test_space/svn/changesets/2345">revision:2345</a>'
+    a["r:2345ef"] = '<a href="http://code.assembla.com/test_space/git/changesets/2345ef">revision:2345ef</a>'
+    a["url:http://www.ru"]   = '<a href="http://www.ru">http://www.ru</a>'
+    a["url:https://www.ru"]  = '<a href="https://www.ru">https://www.ru</a>'
+    a["url:www.ru"]          = '<a href="http://www.ru">http://www.ru</a>'
+    a["url:www.ru/?a=1&b=2"] = '<a href="http://www.ru/?a=1&b=2">http://www.ru/?a=1&amp;b=2</a>'
+    a["url:ftp://www.ru"]    = '<a href="ftp://www.ru">ftp://www.ru</a>'
+    a["url:/spaces/x2"]      = '<a href="/spaces/x2">/spaces/x2</a>'
+    a.each do |k,v|
+      it "parses [[#{k}]]" do
+        parse("[[#{k}]]").should == v
+      end
+      it "parses [[#{k}|привет тест]]" do
+        parse("[[#{k}|привет тест]]").should == v.sub(/>.*</,">привет тест<")
+      end
+      it "parses [[#{k}|test & here]]" do
+        parse("[[#{k}|test & here]]").should == v.sub(/>.*</,">test &amp; here<")
+      end
+    end
+    it "keeps unknown link types" do
+      s = "[[zzz:xxx]]"
+      parse(s).should == s
+      s = "[[abcd:1234]]"
+      parse(s).should == s
+      s = "[[abcd::1234]] [[abcd:1234]] [[uri:www.ru]]"
+      parse(s).should == s
+    end
+    it "links to ExistingFile.txt"
+    it "links to NotExistingFile.txt"
+    it "links to ExistingImage.png"
+    it "links to NotExistingImage.png"
+  end
+###############################################################################
+###############################################################################
+###############################################################################
+  HTML_ESCAPE   =   { '&' => '&amp;', '>' => '&gt;', '<' => '&lt;', '"' => '&quot;' }
+  def h s
+    s.to_s.gsub(/[&"><]/) { |special| HTML_ESCAPE[special] }
+  end
+  def parse s
+    BreakoutParser.parse(s).strip
+  end
+end

metadata ADDED Viewed

@@ -0,0 +1,71 @@
+--- !ruby/object:Gem::Specification
+name: breakout_parser
+version: !ruby/object:Gem::Version
+  version: 0.0.0
+platform: x86-mswin32
+authors:
+- Andrey "Zed" Zaikin
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2010-01-19 00:00:00 +05:00
+default_executable:
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: rspec
+  type: :development
+  version_requirement:
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 1.2.9
+    version:
+description: BreakoutParser
+email: zed.0xff@gmail.com
+executables: []
+extensions: []
+extra_rdoc_files:
+- LICENSE
+files:
+- .gitignore
+- LICENSE
+- Rakefile
+- VERSION
+- lib/breakout_parser.rb
+- lib/breakout_parser/win32-ruby1.8/breakout_parser.so
+- spec/parser_examples_spec.rb
+- spec/parser_spec.rb
+has_rdoc: true
+homepage: http://assembla.com
+licenses: []
+post_install_message:
+rdoc_options:
+- --charset=UTF-8
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: "0"
+  version:
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: "0"
+  version:
+requirements: []
+rubyforge_project:
+rubygems_version: 1.3.5
+signing_key:
+specification_version: 3
+summary: BreakoutParser
+test_files:
+- spec/parser_spec.rb