RubyGems - wikiwah - Versions diffs - 0.0.1 - Mend

wikiwah 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

data/Rakefile +12 -0
data/lib/wikiwah/flow.rb +210 -0
data/lib/wikiwah/flow.rb~ +210 -0
data/lib/wikiwah/subst.rb +41 -0
data/lib/wikiwah/subst.rb~ +41 -0
data/lib/wikiwah/version.rb +3 -0
data/lib/wikiwah/version.rb~ +3 -0
data/lib/wikiwah.rb +103 -0
data/lib/wikiwah.rb~ +99 -0
data/test/wikiwah/flow_tests.rb +422 -0
data/test/wikiwah/flow_tests.rb~ +422 -0
data/test/wikiwah/subst_tests.rb +36 -0
data/test/wikiwah/subst_tests.rb~ +36 -0
data/test/wikiwah_tests.rb +63 -0
data/test/wikiwah_tests.rb~ +63 -0
metadata +93 -0

data/Rakefile ADDED Viewed

@@ -0,0 +1,12 @@
+require 'bundler'
+Bundler::GemHelper.install_tasks
+require 'rake/testtask'
+Rake::TestTask.new do |t|
+  t.libs << "lib"
+  t.test_files = FileList['test/**/*_tests.rb']
+end
+task :default => :test

data/lib/wikiwah/flow.rb ADDED Viewed

@@ -0,0 +1,210 @@
+#!/usr/bin/env ruby
+require 'cgi'                   # for escapeHTML
+class WikiWah
+  # Flow deals with block-level formatting in WikiWah.  Input text is split
+  # into paragraphs, separated by blank lines.  A list-item bullet also
+  # implies a new paragraph.
+  #
+  # Flow keeps track of the current level of indentation, and emits
+  # block-start and block-end tags (e.g. "<li>", "</li>") as required.
+  #
+  # Flow recognises the following types of blocks:
+  #
+  # - A line prefixed by "=" is a heading.  The heading-level is implied by
+  #   the number of "=" characters.
+  #
+  # - A line beginning with "*" or "-" is an unordered list item.
+  #
+  # - A line beginning with "1.", "(1)" or "#" is an ordered list item.
+  #
+  # - A paragraph prefixed by "|" is preformatted text (e.g. code)
+  #
+  # - A paragraph prefixed by ">" is a blockquote (ie. a citation)
+  #
+  # - Anything else is plain old body text.
+  #
+  class Flow
+    # Convert +input+ text to HTML.
+    #
+    # An optional +filter+ block may be provided, in which case it's
+    # applied to the body of each block.
+    def Flow.convert(input, &filter)
+      buff = ''
+      parser = Flow.new(buff,filter)
+      parser.process(input)
+      buff
+    end
+    # Patterns that start a new block
+    BlankRegexp = /\A *$/
+    BulletRegexp = Regexp.new('\A *([\*\-\#]|\d+\.|\(\d+\)) ')
+    def initialize(out, text_filter=null)
+      @out = out
+      @text_filter = text_filter
+      @context_stack = [TopContext]
+      @block_buffer = nil
+    end
+    # Process a multi-line input string
+    def process(input)
+      add_input(input)
+      flush_context_stack
+    end
+    private
+    # Process multi-line input
+    def add_input(input)
+      input.each do |line|
+        if (line =~ BlankRegexp)
+          start_new_block
+        else
+          if (line =~ BulletRegexp)
+            start_new_block
+          end
+          append_to_block(line)
+        end
+      end
+      start_new_block
+    end
+    # Append a line to the block
+    def append_to_block(line)
+      @block_buffer = (@block_buffer || '') + line
+    end
+    # Flush the buffered block
+    def start_new_block
+      if (@block_buffer)
+        add_block(@block_buffer)
+        @block_buffer = nil
+      end
+    end
+    # Add a block
+    def add_block(block)
+      case block
+      when /\A( *)- /       # unordered list item
+        push_context('ul',$1.size)
+        write_tag($', 'li')
+      when /\A( *)\* /       # unordered list item
+        push_context('ul class="sparse"',$1.size)
+        write_tag($', 'li')
+      when /\A( *)(\#|\d+\.|\(\d+\)) / # ordered list item
+        push_context('ol',$1.size)
+        write_tag($', 'li')
+      when /\A( *)% /       # unordered list item
+        push_context('dl',$1.size)
+        write_tag($', 'dt')
+      when /\A(( *)> )/           # citation
+        push_context('blockquote',$2.size)
+        block = strip_prefix($1, block)
+        write_text(block)
+      when /\A(( *)\| )/          # preformatted (explicit)
+        push_context('pre',$2.size)
+        block = strip_prefix($1, block)
+        write_html(CGI.escapeHTML(block))
+      when /\A( *)(=+) /        # heading
+        flush_context_stack
+        write_tag($', "h#{$2.size}")
+      when /\A( *)/        	# body text
+        tag = \
+        if $1 == ""
+          'p'
+        elsif context.tag == 'dl'
+          'dd'
+        else
+          'blockquote'
+        end
+        push_context(tag,$1.size,true)
+        block = strip_prefix($1, block)
+        write_text(block)
+      end
+    end
+    def strip_prefix(prefix, text)
+      pattern = '^' + Regexp.quote(prefix)
+      pattern.sub!(/\\ $/, '( |$)')
+      regexp = Regexp.new(pattern)
+      text.gsub(regexp, '')
+    end
+    # Write a balanced tag
+    def write_tag(content, tag)
+      write_html("<#{tag}>\n")
+      write_text(content)
+      write_html("</#{tag}>\n")
+    end
+    # Write HTML markup
+    def write_html(html)
+      @out << html
+    end
+    # Write text content, performing any necessary substitutions
+    def write_text(text)
+      if (@text_filter)
+        text = @text_filter.call(text)
+      end
+      @out << text
+    end
+    Context = Struct.new('Context', :tag, :level)
+    TopContext = Context.new(:top, -1)
+    # Get the current Context
+    def context
+      @context_stack.last
+    end
+    # Push a new Context on the stack
+    def push_context(tag_with_arguments, level, separate_same=false)
+      match = %r{^(\w+)(.*)$}.match(tag_with_arguments)
+      tag = match[1]
+      arguments = match[2]
+      original_level = context.level
+      pop_context_to_level(level)
+      if (context.level == level)
+        if (context.tag != tag)
+          pop_context
+        elsif (separate_same)
+          write_html("</#{tag}>\n")
+          write_html("<#{tag}#{arguments}>\n")
+        end
+      end
+      if (context.level < level)
+        write_html("<#{tag}#{arguments}>\n")
+        @context_stack << Context.new(tag,level)
+      end
+    end
+    # Pop topmost Context from the stack
+    def pop_context
+      if (context == TopContext)
+        raise "can't pop at top"
+      end
+      cxt = @context_stack.pop
+      write_html("</#{cxt.tag}>\n")
+    end
+    def pop_context_to_level(level)
+      while (context.level > level)
+        pop_context
+      end
+    end
+    # Pop all Contexts from the stack
+    def flush_context_stack
+      while (context != TopContext)
+        pop_context
+      end
+    end
+  end
+end

data/lib/wikiwah/flow.rb~ ADDED Viewed

@@ -0,0 +1,210 @@
+#!/usr/bin/env ruby
+require 'cgi'                   # for escapeHTML
+class WikiWah
+  # Flow deals with block-level formatting in WikiWah.  Input text is split
+  # into paragraphs, separated by blank lines.  A list-item bullet also
+  # implies a new paragraph.
+  #
+  # Flow keeps track of the current level of indentation, and emits
+  # block-start and block-end tags (e.g. "<li>", "</li>") as required.
+  #
+  # Flow recognises the following types of blocks:
+  #
+  # - A line prefixed by "=" is a heading.  The heading-level is implied by
+  #   the number of "=" characters.
+  #
+  # - A line beginning with "*" or "-" is an unordered list item.
+  #
+  # - A line beginning with "1.", "(1)" or "#" is an ordered list item.
+  #
+  # - A paragraph prefixed by "|" is preformatted text (e.g. code)
+  #
+  # - A paragraph prefixed by ">" is a blockquote (ie. a citation)
+  #
+  # - Anything else is plain old body text.
+  #
+  class Flow
+    # Convert +input+ text to HTML.
+    #
+    # An optional +filter+ block may be provided, in which case it's
+    # applied to the body of each block.
+    def Flow.convert(input, &filter)
+      buff = ''
+      parser = Flow.new(buff,filter)
+      parser.process(input)
+      buff
+    end
+    # Patterns that start a new block
+    BlankRegexp = /\A *$/
+    BulletRegexp = Regexp.new('\A *([\*\-\#]|\d+\.|\(\d+\)) ')
+    def initialize(out, text_filter=null)
+      @out = out
+      @text_filter = text_filter
+      @context_stack = [TopContext]
+      @block_buffer = nil
+    end
+    # Process a multi-line input string
+    def process(input)
+      add_input(input)
+      flush_context_stack
+    end
+    private
+    # Process multi-line input
+    def add_input(input)
+      input.each do |line|
+        if (line =~ BlankRegexp)
+          start_new_block
+        else
+          if (line =~ BulletRegexp)
+            start_new_block
+          end
+          append_to_block(line)
+        end
+      end
+      start_new_block
+    end
+    # Append a line to the block
+    def append_to_block(line)
+      @block_buffer = (@block_buffer || '') + line
+    end
+    # Flush the buffered block
+    def start_new_block
+      if (@block_buffer)
+        add_block(@block_buffer)
+        @block_buffer = nil
+      end
+    end
+    # Add a block
+    def add_block(block)
+      case block
+      when /\A( *)- /       # unordered list item
+        push_context('ul',$1.size)
+        write_tag($', 'li')
+      when /\A( *)\* /       # unordered list item
+        push_context('ul class="sparse"',$1.size)
+        write_tag($', 'li')
+      when /\A( *)(\#|\d+\.|\(\d+\)) / # ordered list item
+        push_context('ol',$1.size)
+        write_tag($', 'li')
+      when /\A( *)% /       # unordered list item
+        push_context('dl',$1.size)
+        write_tag($', 'dt')
+      when /\A(( *)> )/           # citation
+        push_context('blockquote',$2.size)
+        block = strip_prefix($1, block)
+        write_text(block)
+      when /\A(( *)\| )/          # preformatted (explicit)
+        push_context('pre',$2.size)
+        block = strip_prefix($1, block)
+        write_html(CGI.escapeHTML(block))
+      when /\A( *)(=+) /        # heading
+        flush_context_stack
+        write_tag($', "h#{$2.size}")
+      when /\A( *)/        	# body text
+        tag = \
+        if $1 == ""
+          'p'
+        elsif context.tag == 'dl'
+          'dd'
+        else
+          'blockquote'
+        end
+        push_context(tag,$1.size,true)
+        block = strip_prefix($1, block)
+        write_text(block)
+      end
+    end
+    def strip_prefix(prefix, text)
+      pattern = '^' + Regexp.quote(prefix)
+      pattern.sub!(/\\ $/, '( |$)')
+      regexp = Regexp.new(pattern)
+      text.gsub(regexp, '')
+    end
+    # Write a balanced tag
+    def write_tag(content, tag)
+      write_html("<#{tag}>\n")
+      write_text(content)
+      write_html("</#{tag}>\n")
+    end
+    # Write HTML markup
+    def write_html(html)
+      @out << html
+    end
+    # Write text content, performing any necessary substitutions
+    def write_text(text)
+      if (@text_filter)
+        text = @text_filter.call(text)
+      end
+      @out << text
+    end
+    Context = Struct.new('Context', :tag, :level)
+    TopContext = Context.new(:top, -1)
+    # Get the current Context
+    def context
+      @context_stack.last
+    end
+    # Push a new Context on the stack
+    def push_context(tag_with_arguments, level, separate_same=false)
+      match = %r{^(\w+)(.*)$}.match(tag_with_arguments)
+      tag = match[1]
+      arguments = match[2]
+      original_level = context.level
+      pop_context_to_level(level)
+      if (context.level == level)
+        if (context.tag != tag)
+          pop_context
+        elsif (separate_same)
+          write_html("</#{tag}>\n")
+          write_html("<#{tag}#{arguments}>\n")
+        end
+      end
+      if (context.level < level)
+        write_html("<#{tag}#{arguments}>\n")
+        @context_stack << Context.new(tag,level)
+      end
+    end
+    # Pop topmost Context from the stack
+    def pop_context
+      if (context == TopContext)
+        raise "can't pop at top"
+      end
+      cxt = @context_stack.pop
+      write_html("</#{cxt.tag}>\n")
+    end
+    def pop_context_to_level(level)
+      while (context.level > level)
+        pop_context
+      end
+    end
+    # Pop all Contexts from the stack
+    def flush_context_stack
+      while (context != TopContext)
+        pop_context
+      end
+    end
+  end
+end

data/lib/wikiwah/subst.rb ADDED Viewed

@@ -0,0 +1,41 @@
+#!/usr/bin/env ruby
+class WikiWah
+  # Subst handles text-transformation using a series of regular-expression
+  # substitutions.  It encapsulates a number of "patterns", and associated
+  # blocks.  Each block is invoked with a MatchData object when it's
+  # associated pattern matches, and is expected to return a replacement
+  # string.
+  #
+  # The difference between using Subst and applying a series of gsub's is
+  # that replacement values are protected from subsequent transformations.
+  class Subst
+    def initialize
+      @transforms = []
+    end
+    def add_transformation(regexp, &proc)
+      @transforms << [regexp, proc]
+    end
+    def transform(s)
+      s = s.dup
+      store = []
+      @transforms.each do |transform|
+        (regexp, proc) = *transform
+        s.gsub!(regexp) {
+          store << proc.call($~)
+          "\001#{store.size - 1}\002"
+        }
+      end
+      s.gsub!(/\001(\d+)\002/) {
+        store[$1.to_i]
+      }
+      s
+    end
+  end
+end

data/lib/wikiwah/subst.rb~ ADDED Viewed

@@ -0,0 +1,41 @@
+#!/usr/bin/env ruby
+class WikiWah
+  # Subst handles text-transformation using a series of regular-expression
+  # substitutions.  It encapsulates a number of "patterns", and associated
+  # blocks.  Each block is invoked with a MatchData object when it's
+  # associated pattern matches, and is expected to return a replacement
+  # string.
+  #
+  # The difference between using Subst and applying a series of gsub's is
+  # that replacement values are protected from subsequent transformations.
+  class Subst
+    def initialize
+      @transforms = []
+    end
+    def add_transformation(regexp, &proc)
+      @transforms << [regexp, proc]
+    end
+    def transform(s)
+      s = s.dup
+      store = []
+      @transforms.each do |transform|
+        (regexp, proc) = *transform
+        s.gsub!(regexp) {
+          store << proc.call($~)
+          "\001#{store.size - 1}\002"
+        }
+      end
+      s.gsub!(/\001(\d+)\002/) {
+        store[$1.to_i]
+      }
+      s
+    end
+  end
+end

data/lib/wikiwah/version.rb ADDED Viewed

@@ -0,0 +1,3 @@
+class WikiWah
+  VERSION = "0.0.1"
+end

data/lib/wikiwah/version.rb~ ADDED Viewed

@@ -0,0 +1,3 @@
+class Wikiwah
+  VERSION = "0.0.1"
+end

data/lib/wikiwah.rb ADDED Viewed

@@ -0,0 +1,103 @@
+require 'wikiwah/flow'
+require 'wikiwah/subst'
+require 'wikiwah/version'
+# A formatter for turning Wiki-esque text into HTML.
+#
+# = Block-level markup
+#
+# - A line prefixed by "=" is a heading.  The heading-level is implied by
+#   the number of "=" characters.
+#
+# - A line beginning with "*" or "-" is an unordered list item.
+#
+# - A line beginning with "1.", "(1)" or "#" is an ordered list item.
+#
+# - A paragraph prefixed by "|" is preformatted text (e.g. code)
+#
+# - A paragraph prefixed by ">" is a blockquote (ie. a citation)
+#
+# = Text markup
+#
+# - HTML tags are rendered verbatim.
+#
+# - Text may by marked *bold*, /italic/, _underlined_, +monospace+
+#
+# - Text may be quoted with '{{{' and '}}}'
+#
+# - URLs turn into links.
+#
+# - "{LOCATION}" creates a link to LOCATION.
+#
+# - "{LABEL}@LOCATION" creates a link to LOCATION, with the specified
+#   LABEL.
+#
+class WikiWah
+  attr_writer :link_translator
+  def initialize
+    @link_translator = proc do |link| link end
+    init_transformer
+  end
+  # Convert WikiWah text to HTML.
+  def to_html(text)
+    Flow.convert(text) do |paragraph|
+      @transformer.transform(paragraph)
+    end
+  end
+  def self.to_html(text)
+    self.new.to_html(text)
+  end
+  private
+  def translate_link(link)
+    @link_translator.call(link)
+  end
+  def init_transformer
+    @transformer = WikiWah::Subst.new
+    @transformer.add_transformation(/""(.+)""/) do |match|
+      # Double-double-quoted
+      CGI.escapeHTML(match[1])
+    end
+    @transformer.add_transformation(/\\(.)/) do |match|
+      # Backslash-quoted
+      match[1]
+    end
+    @transformer.add_transformation(/\<(.+?)\>/m) do |match|
+      # In-line HTML
+      match[0]
+    end
+    @transformer.add_transformation(/\{(.+?)\}(@(\S*[\w\/]))?/m) do |match|
+      # Distinuished link
+      label = @transformer.transform(match[1])
+      location = translate_link(match[3] || match[1])
+      if location
+        "<a href='#{location}'>#{label}</a>"
+      else
+        "{#{label}}"
+      end
+    end
+    @transformer.add_transformation(/\b[a-z]+:[\w\/]\S*[\w\/]/) do |match|
+      # URL
+      "<a href='#{match[0]}'>#{match[0]}</a>"
+    end
+    @transformer.add_transformation(%r[(^|\W)([*+_/])([*+_/]*\w.*?\w[*+_/]*)\2(?!\w)]) do |match|
+      # Bold/italic/etc.
+      tag = case match[2]
+            when '*'; 'strong'
+            when '+'; 'tt'
+            when '/'; 'em'
+            when '_'; 'u'
+            end
+      content = @transformer.transform(match[3])
+      (match[1] + '<' + tag + '>' + content + '</' + tag + '>')
+    end
+  end
+end