RubyGems - wikiwah - Versions diffs - 0.0.1 - Mend

wikiwah 0.0.1

Files changed (16) hide show

data/Rakefile +12 -0
data/lib/wikiwah/flow.rb +210 -0
data/lib/wikiwah/flow.rb~ +210 -0
data/lib/wikiwah/subst.rb +41 -0
data/lib/wikiwah/subst.rb~ +41 -0
data/lib/wikiwah/version.rb +3 -0
data/lib/wikiwah/version.rb~ +3 -0
data/lib/wikiwah.rb +103 -0
data/lib/wikiwah.rb~ +99 -0
data/test/wikiwah/flow_tests.rb +422 -0
data/test/wikiwah/flow_tests.rb~ +422 -0
data/test/wikiwah/subst_tests.rb +36 -0
data/test/wikiwah/subst_tests.rb~ +36 -0
data/test/wikiwah_tests.rb +63 -0
data/test/wikiwah_tests.rb~ +63 -0
metadata +93 -0

data/Rakefile ADDED Viewed

@@ -0,0 +1,12 @@
+require 'bundler'
+Bundler::GemHelper.install_tasks
+require 'rake/testtask'
+Rake::TestTask.new do |t|
+  t.libs << "lib"
+  t.test_files = FileList['test/**/*_tests.rb']
+end
+task :default => :test

data/lib/wikiwah/flow.rb ADDED Viewed

@@ -0,0 +1,210 @@
+#!/usr/bin/env ruby
+require 'cgi'                   # for escapeHTML
+class WikiWah
+  # Flow deals with block-level formatting in WikiWah.  Input text is split
+  # into paragraphs, separated by blank lines.  A list-item bullet also
+  # implies a new paragraph.
+  #
+  # Flow keeps track of the current level of indentation, and emits
+  # block-start and block-end tags (e.g. "<li>", "</li>") as required.
+  #
+  # Flow recognises the following types of blocks:
+  #
+  # - A line prefixed by "=" is a heading.  The heading-level is implied by
+  #   the number of "=" characters.
+  #
+  # - A line beginning with "*" or "-" is an unordered list item.
+  #
+  # - A line beginning with "1.", "(1)" or "#" is an ordered list item.
+  #
+  # - A paragraph prefixed by "|" is preformatted text (e.g. code)
+  #
+  # - A paragraph prefixed by ">" is a blockquote (ie. a citation)
+  #
+  # - Anything else is plain old body text.
+  #
+  class Flow
+    # Convert +input+ text to HTML.
+    #
+    # An optional +filter+ block may be provided, in which case it's
+    # applied to the body of each block.
+    def Flow.convert(input, &filter)
+      buff = ''
+      parser = Flow.new(buff,filter)
+      parser.process(input)
+      buff
+    end
+    # Patterns that start a new block
+    BlankRegexp = /\A *$/
+    BulletRegexp = Regexp.new('\A *([\*\-\#]|\d+\.|\(\d+\)) ')
+    def initialize(out, text_filter=null)
+      @out = out
+      @text_filter = text_filter
+      @context_stack = [TopContext]
+      @block_buffer = nil
+    end
+    # Process a multi-line input string
+    def process(input)
+      add_input(input)
+      flush_context_stack
+    end
+    private
+    # Process multi-line input
+    def add_input(input)
+      input.each do |line|
+        if (line =~ BlankRegexp)
+          start_new_block
+        else
+          if (line =~ BulletRegexp)
+            start_new_block
+          end
+          append_to_block(line)
+        end
+      end
+      start_new_block
+    end
+    # Append a line to the block
+    def append_to_block(line)
+      @block_buffer = (@block_buffer || '') + line
+    end
+    # Flush the buffered block
+    def start_new_block
+      if (@block_buffer)
+        add_block(@block_buffer)
+        @block_buffer = nil
+      end
+    end
+    # Add a block
+    def add_block(block)
+      case block
+      when /\A( *)- /       # unordered list item
+        push_context('ul',$1.size)
+        write_tag($', 'li')
+      when /\A( *)\* /       # unordered list item
+        push_context('ul class="sparse"',$1.size)
+        write_tag($', 'li')
+      when /\A( *)(\#|\d+\.|\(\d+\)) / # ordered list item
+        push_context('ol',$1.size)
+        write_tag($', 'li')
+      when /\A( *)% /       # unordered list item
+        push_context('dl',$1.size)
+        write_tag($', 'dt')
+      when /\A(( *)> )/           # citation
+        push_context('blockquote',$2.size)
+        block = strip_prefix($1, block)
+        write_text(block)
+      when /\A(( *)\| )/          # preformatted (explicit)
+        push_context('pre',$2.size)
+        block = strip_prefix($1, block)
+        write_html(CGI.escapeHTML(block))
+      when /\A( *)(=+) /        # heading
+        flush_context_stack
+        write_tag($', "h#{$2.size}")
+      when /\A( *)/        	# body text
+        tag = \
+        if $1 == ""
+          'p'
+        elsif context.tag == 'dl'
+          'dd'
+        else
+          'blockquote'
+        end
+        push_context(tag,$1.size,true)
+        block = strip_prefix($1, block)
+        write_text(block)
+      end
+    end
+    def strip_prefix(prefix, text)
+      pattern = '^' + Regexp.quote(prefix)
+      pattern.sub!(/\\ $/, '( |$)')
+      regexp = Regexp.new(pattern)
+      text.gsub(regexp, '')
+    end
+    # Write a balanced tag
+    def write_tag(content, tag)
+      write_html("<#{tag}>\n")
+      write_text(content)
+      write_html("</#{tag}>\n")
+    end
+    # Write HTML markup
+    def write_html(html)
+      @out << html
+    end
+    # Write text content, performing any necessary substitutions
+    def write_text(text)
+      if (@text_filter)
+        text = @text_filter.call(text)
+      end
+      @out << text
+    end
+    Context = Struct.new('Context', :tag, :level)
+    TopContext = Context.new(:top, -1)
+    # Get the current Context
+    def context
+      @context_stack.last
+    end
+    # Push a new Context on the stack
+    def push_context(tag_with_arguments, level, separate_same=false)
+      match = %r{^(\w+)(.*)$}.match(tag_with_arguments)
+      tag = match[1]
+      arguments = match[2]
+      original_level = context.level
+      pop_context_to_level(level)
+      if (context.level == level)
+        if (context.tag != tag)
+          pop_context
+        elsif (separate_same)
+          write_html("</#{tag}>\n")
+          write_html("<#{tag}#{arguments}>\n")
+        end
+      end
+      if (context.level < level)
+        write_html("<#{tag}#{arguments}>\n")
+        @context_stack << Context.new(tag,level)
+      end
+    end
+    # Pop topmost Context from the stack
+    def pop_context
+      if (context == TopContext)
+        raise "can't pop at top"
+      end
+      cxt = @context_stack.pop
+      write_html("</#{cxt.tag}>\n")
+    end
+    def pop_context_to_level(level)
+      while (context.level > level)
+        pop_context
+      end
+    end
+    # Pop all Contexts from the stack
+    def flush_context_stack
+      while (context != TopContext)
+        pop_context
+      end
+    end
+  end
+end

data/lib/wikiwah/flow.rb~ ADDED Viewed

@@ -0,0 +1,210 @@
+#!/usr/bin/env ruby
+require 'cgi'                   # for escapeHTML
+class WikiWah
+  # Flow deals with block-level formatting in WikiWah.  Input text is split
+  # into paragraphs, separated by blank lines.  A list-item bullet also
+  # implies a new paragraph.
+  #
+  # Flow keeps track of the current level of indentation, and emits
+  # block-start and block-end tags (e.g. "<li>", "</li>") as required.
+  #
+  # Flow recognises the following types of blocks:
+  #
+  # - A line prefixed by "=" is a heading.  The heading-level is implied by
+  #   the number of "=" characters.
+  #
+  # - A line beginning with "*" or "-" is an unordered list item.
+  #
+  # - A line beginning with "1.", "(1)" or "#" is an ordered list item.
+  #
+  # - A paragraph prefixed by "|" is preformatted text (e.g. code)
+  #
+  # - A paragraph prefixed by ">" is a blockquote (ie. a citation)
+  #
+  # - Anything else is plain old body text.
+  #
+  class Flow
+    # Convert +input+ text to HTML.
+    #
+    # An optional +filter+ block may be provided, in which case it's
+    # applied to the body of each block.
+    def Flow.convert(input, &filter)
+      buff = ''
+      parser = Flow.new(buff,filter)
+      parser.process(input)
+      buff
+    end
+    # Patterns that start a new block
+    BlankRegexp = /\A *$/
+    BulletRegexp = Regexp.new('\A *([\*\-\#]|\d+\.|\(\d+\)) ')
+    def initialize(out, text_filter=null)
+      @out = out
+      @text_filter = text_filter
+      @context_stack = [TopContext]
+      @block_buffer = nil
+    end
+    # Process a multi-line input string
+    def process(input)
+      add_input(input)
+      flush_context_stack
+    end
+    private
+    # Process multi-line input
+    def add_input(input)
+      input.each do |line|
+        if (line =~ BlankRegexp)
+          start_new_block
+        else
+          if (line =~ BulletRegexp)
+            start_new_block
+          end
+          append_to_block(line)
+        end
+      end
+      start_new_block
+    end
+    # Append a line to the block
+    def append_to_block(line)
+      @block_buffer = (@block_buffer || '') + line
+    end
+    # Flush the buffered block
+    def start_new_block
+      if (@block_buffer)
+        add_block(@block_buffer)
+        @block_buffer = nil
+      end
+    end
+    # Add a block
+    def add_block(block)
+      case block
+      when /\A( *)- /       # unordered list item
+        push_context('ul',$1.size)
+        write_tag($', 'li')
+      when /\A( *)\* /       # unordered list item
+        push_context('ul class="sparse"',$1.size)
+        write_tag($', 'li')
+      when /\A( *)(\#|\d+\.|\(\d+\)) / # ordered list item
+        push_context('ol',$1.size)
+        write_tag($', 'li')
+      when /\A( *)% /       # unordered list item
+        push_context('dl',$1.size)
+        write_tag($', 'dt')
+      when /\A(( *)> )/           # citation
+        push_context('blockquote',$2.size)
+        block = strip_prefix($1, block)
+        write_text(block)
+      when /\A(( *)\| )/          # preformatted (explicit)
+        push_context('pre',$2.size)
+        block = strip_prefix($1, block)
+        write_html(CGI.escapeHTML(block))
+      when /\A( *)(=+) /        # heading
+        flush_context_stack
+        write_tag($', "h#{$2.size}")
+      when /\A( *)/        	# body text
+        tag = \
+        if $1 == ""
+          'p'
+        elsif context.tag == 'dl'
+          'dd'
+        else
+          'blockquote'
+        end
+        push_context(tag,$1.size,true)
+        block = strip_prefix($1, block)
+        write_text(block)
+      end
+    end
+    def strip_prefix(prefix, text)
+      pattern = '^' + Regexp.quote(prefix)
+      pattern.sub!(/\\ $/, '( |$)')
+      regexp = Regexp.new(pattern)
+      text.gsub(regexp, '')
+    end
+    # Write a balanced tag
+    def write_tag(content, tag)
+      write_html("<#{tag}>\n")
+      write_text(content)
+      write_html("</#{tag}>\n")
+    end
+    # Write HTML markup
+    def write_html(html)
+      @out << html
+    end
+    # Write text content, performing any necessary substitutions
+    def write_text(text)
+      if (@text_filter)
+        text = @text_filter.call(text)
+      end
+      @out << text
+    end
+    Context = Struct.new('Context', :tag, :level)
+    TopContext = Context.new(:top, -1)
+    # Get the current Context
+    def context
+      @context_stack.last
+    end
+    # Push a new Context on the stack
+    def push_context(tag_with_arguments, level, separate_same=false)
+      match = %r{^(\w+)(.*)$}.match(tag_with_arguments)
+      tag = match[1]
+      arguments = match[2]
+      original_level = context.level
+      pop_context_to_level(level)
+      if (context.level == level)
+        if (context.tag != tag)
+          pop_context
+        elsif (separate_same)
+          write_html("</#{tag}>\n")
+          write_html("<#{tag}#{arguments}>\n")
+        end
+      end
+      if (context.level < level)
+        write_html("<#{tag}#{arguments}>\n")
+        @context_stack << Context.new(tag,level)
+      end
+    end
+    # Pop topmost Context from the stack
+    def pop_context
+      if (context == TopContext)
+        raise "can't pop at top"
+      end
+      cxt = @context_stack.pop
+      write_html("</#{cxt.tag}>\n")
+    end
+    def pop_context_to_level(level)
+      while (context.level > level)
+        pop_context
+      end
+    end
+    # Pop all Contexts from the stack
+    def flush_context_stack
+      while (context != TopContext)
+        pop_context
+      end
+    end
+  end
+end

data/lib/wikiwah/subst.rb ADDED Viewed

@@ -0,0 +1,41 @@
+#!/usr/bin/env ruby
+class WikiWah
+  # Subst handles text-transformation using a series of regular-expression
+  # substitutions.  It encapsulates a number of "patterns", and associated
+  # blocks.  Each block is invoked with a MatchData object when it's
+  # associated pattern matches, and is expected to return a replacement
+  # string.
+  #
+  # The difference between using Subst and applying a series of gsub's is
+  # that replacement values are protected from subsequent transformations.
+  class Subst
+    def initialize
+      @transforms = []
+    end
+    def add_transformation(regexp, &proc)
+      @transforms << [regexp, proc]
+    end
+    def transform(s)
+      s = s.dup
+      store = []
+      @transforms.each do |transform|
+        (regexp, proc) = *transform
+        s.gsub!(regexp) {
+          store << proc.call($~)
+          "\001#{store.size - 1}\002"
+        }
+      end
+      s.gsub!(/\001(\d+)\002/) {
+        store[$1.to_i]
+      }
+      s
+    end
+  end
+end

data/lib/wikiwah/subst.rb~ ADDED Viewed

@@ -0,0 +1,41 @@
+#!/usr/bin/env ruby
+class WikiWah
+  # Subst handles text-transformation using a series of regular-expression
+  # substitutions.  It encapsulates a number of "patterns", and associated
+  # blocks.  Each block is invoked with a MatchData object when it's
+  # associated pattern matches, and is expected to return a replacement
+  # string.
+  #
+  # The difference between using Subst and applying a series of gsub's is
+  # that replacement values are protected from subsequent transformations.
+  class Subst
+    def initialize
+      @transforms = []
+    end
+    def add_transformation(regexp, &proc)
+      @transforms << [regexp, proc]
+    end
+    def transform(s)
+      s = s.dup
+      store = []
+      @transforms.each do |transform|
+        (regexp, proc) = *transform
+        s.gsub!(regexp) {
+          store << proc.call($~)
+          "\001#{store.size - 1}\002"
+        }
+      end
+      s.gsub!(/\001(\d+)\002/) {
+        store[$1.to_i]
+      }
+      s
+    end
+  end
+end

data/lib/wikiwah/version.rb ADDED Viewed

@@ -0,0 +1,3 @@
+class WikiWah
+  VERSION = "0.0.1"
+end

data/lib/wikiwah/version.rb~ ADDED Viewed

@@ -0,0 +1,3 @@
+class Wikiwah
+  VERSION = "0.0.1"
+end

data/lib/wikiwah.rb ADDED Viewed

@@ -0,0 +1,103 @@
+require 'wikiwah/flow'
+require 'wikiwah/subst'
+require 'wikiwah/version'
+# A formatter for turning Wiki-esque text into HTML.
+#
+# = Block-level markup
+#
+# - A line prefixed by "=" is a heading.  The heading-level is implied by
+#   the number of "=" characters.
+#
+# - A line beginning with "*" or "-" is an unordered list item.
+#
+# - A line beginning with "1.", "(1)" or "#" is an ordered list item.
+#
+# - A paragraph prefixed by "|" is preformatted text (e.g. code)
+#
+# - A paragraph prefixed by ">" is a blockquote (ie. a citation)
+#
+# = Text markup
+#
+# - HTML tags are rendered verbatim.
+#
+# - Text may by marked *bold*, /italic/, _underlined_, +monospace+
+#
+# - Text may be quoted with '{{{' and '}}}'
+#
+# - URLs turn into links.
+#
+# - "{LOCATION}" creates a link to LOCATION.
+#
+# - "{LABEL}@LOCATION" creates a link to LOCATION, with the specified
+#   LABEL.
+#
+class WikiWah
+  attr_writer :link_translator
+  def initialize
+    @link_translator = proc do |link| link end
+    init_transformer
+  end
+  # Convert WikiWah text to HTML.
+  def to_html(text)
+    Flow.convert(text) do |paragraph|
+      @transformer.transform(paragraph)
+    end
+  end
+  def self.to_html(text)
+    self.new.to_html(text)
+  end
+  private
+  def translate_link(link)
+    @link_translator.call(link)
+  end
+  def init_transformer
+    @transformer = WikiWah::Subst.new
+    @transformer.add_transformation(/""(.+)""/) do |match|
+      # Double-double-quoted
+      CGI.escapeHTML(match[1])
+    end
+    @transformer.add_transformation(/\\(.)/) do |match|
+      # Backslash-quoted
+      match[1]
+    end
+    @transformer.add_transformation(/\<(.+?)\>/m) do |match|
+      # In-line HTML
+      match[0]
+    end
+    @transformer.add_transformation(/\{(.+?)\}(@(\S*[\w\/]))?/m) do |match|
+      # Distinuished link
+      label = @transformer.transform(match[1])
+      location = translate_link(match[3] || match[1])
+      if location
+        "<a href='#{location}'>#{label}</a>"
+      else
+        "{#{label}}"
+      end
+    end
+    @transformer.add_transformation(/\b[a-z]+:[\w\/]\S*[\w\/]/) do |match|
+      # URL
+      "<a href='#{match[0]}'>#{match[0]}</a>"
+    end
+    @transformer.add_transformation(%r[(^|\W)([*+_/])([*+_/]*\w.*?\w[*+_/]*)\2(?!\w)]) do |match|
+      # Bold/italic/etc.
+      tag = case match[2]
+            when '*'; 'strong'
+            when '+'; 'tt'
+            when '/'; 'em'
+            when '_'; 'u'
+            end
+      content = @transformer.transform(match[3])
+      (match[1] + '<' + tag + '>' + content + '</' + tag + '>')
+    end
+  end
+end