RubyGems - rpeg-markdown - Versions diffs - 0.1.0 → 0.2.0 - Mend

rpeg-markdown 0.1.0 → 0.2.0

Files changed (103) hide show

data/README CHANGED Viewed

@@ -7,7 +7,31 @@ Markdown (see [peg-markdown][1]) in a Ruby extension.
 [1]: http://github.com/jgm/peg-markdown/
    "Jon MacFarleane's peg-markdown project"
-### COPYING
+Installation/Hacking
+--------------------
+This library requires a recent version of glib2.
+The rpeg-markdown gem is available from Rubyforge:
+  $ sudo gem install rpeg-markdown
+A Git repository is available for hacking:
+  $ git clone git://github.com/rtomayko/rpeg-markdown.git
+  $ cd rpeg-markdown
+  $ rake test
+Patches happily accepted via fork or email.
+Changes
+-------
+0.2.0 / 2008-07-12 - Adds test suite and plugs all memory leaks.
+0.1.0 / 2008-05-30 - Initial release.
+COPYING
+-------
 The peg-markdown sources are licensed under the GPL and the Ruby extension
 sources adopts this license. See the file LICENSE included with this

data/Rakefile CHANGED Viewed

@@ -2,21 +2,28 @@ require 'rake/clean'
 require 'rake/packagetask'
 require 'rake/gempackagetask'
+task :default => :test
 DLEXT = Config::CONFIG['DLEXT']
-VERS = '0.1.0'
+VERS = '0.2.0'
 spec =
   Gem::Specification.new do |s|
     s.name              = "rpeg-markdown"
     s.version           = VERS
     s.summary           = "Ruby extension library for peg-markdown"
-    s.files             = FileList['README','LICENSE','Rakefile','test.rb','{lib,ext}/**.rb','ext/*.{c,h}','bin/rpeg-markdown']
+    s.files             = FileList[
+                            'README','LICENSE','Rakefile',
+                            '{lib,ext,test}/**.rb','ext/*.{c,h}',
+                            'test/MarkdownTest*/**/*',
+                            'bin/rpeg-markdown'
+                          ]
     s.bindir            = 'bin'
     s.executables       << 'rpeg-markdown'
     s.require_path      = 'lib'
     s.has_rdoc          = true
     s.extra_rdoc_files  = ['README', 'LICENSE']
-    s.test_files        = Dir['test.rb']
+    s.test_files        = FileList['test/markdown_test.rb']
     s.extensions        = ['ext/extconf.rb']
     s.author            = 'Ryan Tomayko'
@@ -42,19 +49,29 @@ namespace :submodule do
     end
   end
+  desc 'Update the peg-markdown submodule'
   task :update => :init do
-    sh 'git submodule update peg-markdown'
+    sh 'git submodule update peg-markdown' unless File.symlink?('peg-markdown')
+  end
+  file 'peg-markdown/markdown.c' do
+    Rake::Task['submodule:init'].invoke
   end
+  task :exist => 'peg-markdown/markdown.c'
 end
 desc 'Gather required peg-markdown sources into extension directory'
-task :gather => 'submodule:update' do |t|
+task :gather => 'submodule:exist' do |t|
   sh 'cd peg-markdown && make markdown_parser.c'
-  cp FileList['peg-markdown/markdown_{peg.h,parser.c,output.c}'], 'ext/',
+  files =
+    FileList[
+      'peg-markdown/markdown_{peg.h,parser.c,output.c,lib.c,lib.h}',
+      'peg-markdown/{utility,parsing}_functions.c'
+    ]
+  cp files, 'ext/',
     :preserve => true,
     :verbose => true
 end
-CLOBBER.include 'ext/markdown_{peg.h,parser.c,output.c}'
 file 'ext/Makefile' => FileList['ext/{extconf.rb,*.c,*.h,*.rb}'] do
   chdir('ext') { ruby 'extconf.rb' }
@@ -73,24 +90,76 @@ end
 desc 'Build the peg-markdown extension'
 task :build => "lib/markdown.#{DLEXT}"
-task 'test:unit' => [ :build ] do |t|
-  ruby 'test.rb'
+desc 'Run unit and conformance tests'
+task :test => [ 'test:unit', 'test:conformance' ]
+desc 'Run unit tests'
+task 'test:unit' => [:build] do |t|
+  ruby 'test/markdown_test.rb'
 end
-task 'test:conformance' => [ 'submodule:update', :build ] do |t|
-  chdir('peg-markdown/MarkdownTest_1.0.3') do
-    sh "./MarkdownTest.pl --script=../../bin/rpeg-markdown --tidy"
+desc 'Run conformance tests (MARKDOWN_TEST_VER=1.0)'
+task 'test:conformance' => [:build] do |t|
+  script = "#{pwd}/bin/rpeg-markdown"
+  test_version = ENV['MARKDOWN_TEST_VER'] || '1.0'
+  chdir("test/MarkdownTest_#{test_version}") do
+    sh "./MarkdownTest.pl --script='#{script}' --tidy"
   end
 end
+desc 'Run version 1.0 conformance suite'
+task 'test:conformance:1.0' => 'test:conformance'
+desc 'Run 1.0.3 conformance suite'
+task 'test:conformance:1.0.3' => [:build] do |t|
+  ENV['MARKDOWN_TEST_VER'] = '1.0.3'
+  Rake::Task['test:conformance'].invoke
+end
+desc 'Run unit and conformance tests'
+task :test => %w[test:unit test:conformance]
+desc 'Run benchmarks'
+task :benchmark => :build do |t|
+  $:.unshift 'lib'
+  load 'test/benchmark.rb'
+end
+desc "See how much memory we're losing"
+task 'test:mem' => %w[submodule:exist build] do |t|
+  $: << File.join(File.dirname(__FILE__), "lib")
+  require 'markdown'
+  FileList['test.txt', 'peg-markdown/MarkdownTest_1.0.3/Tests/*.text'].each do |file|
+    printf "%s: \n", file
+    markdown = Markdown.new(File.read(file))
+    iterations = (ENV['N'] || 100).to_i
+    total, growth = [], []
+    iterations.times do |i|
+      start = Time.now
+      GC.start
+      markdown.to_html
+      duration = Time.now - start
+      GC.start
+      total << `ps -o rss= -p #{Process.pid}`.to_i
+      next if i == 0
+      growth << (total.last - (total[-2] || 0))
+      # puts "%03d: %06.02f ms / %dK used / %dK growth" % [ i, duration, total.last, growth.last ]
+    end
+    average = growth.inject(0) { |sum,x| sum + x } / growth.length
+    printf "  %dK avg growth (per run) / %dK used (after %d runs)\n", average, total.last, iterations
+  end
+end
 # ==========================================================
 # Rubyforge
 # ==========================================================
-task 'release' => [ "pkg/rpeg-markdown-#{VERS}.gem", "pkg/rpeg-markdown-#{VERS}.tar.gz" ] do |t|
-  #  "pkg/rpeg-markdown-#{VERS}.gem",
-  #  "pkg/rpeg-markdown-#{VERS}.tar.gz"
-  sh "rubyforge add_release wink rpeg-markdown #{VERS} pkg/rpeg-markdown-#{VERS}.gem"
+PKGNAME = "pkg/rpeg-markdown-#{VERS}"
+desc 'Publish new release to rubyforge'
+task :release => [ "#{PKGNAME}.gem", "#{PKGNAME}.tar.gz" ] do |t|
+  sh <<-end
+    rubyforge add_release wink rpeg-markdown #{VERS} #{PKGNAME}.gem &&
+    rubyforge add_file    wink rpeg-markdown #{VERS} #{PKGNAME}.tar.gz
+  end
 end

data/ext/extconf.h ADDED Viewed

@@ -0,0 +1,3 @@
+#ifndef EXTCONF_H
+#define EXTCONF_H
+#endif

data/ext/extconf.rb CHANGED Viewed

@@ -1,6 +1,17 @@
 require 'mkmf'
-$CFLAGS = "-Wall"
 dir_config('markdown')
+require 'pp'
+$objs = %w[markdown.o markdown_lib.o markdown_output.o markdown_parser.o]
+if pkg_config = find_executable('pkg-config')
+  $CFLAGS = `#{pkg_config} --cflags glib-2.0`
+  $LDFLAGS = `#{pkg_config} --libs glib-2.0`
+else
+  fail "glib2 not found"
+end
+create_header
 create_makefile('markdown')

data/ext/markdown.c CHANGED Viewed

@@ -1,6 +1,5 @@
 #include "ruby.h"
-#include "markdown_peg.h"
-#include "markdown_buffer.h"
+#include "markdown_lib.h"
 static VALUE rb_cMarkdown;
@@ -12,79 +11,36 @@ static ID id_notes;
 #define INCREMENT 4096  /* size of chunks in which to allocate memory */
 static VALUE
-markdown_to_html(VALUE self)
+rb_markdown_to_html(int argc, VALUE *argv, VALUE self)
 {
-  element parsed_input;
-  VALUE output_buffer;
-  char *inputbuf, *curchar;
-  int charstotab, buflength, maxlength;
-  /* grab char pointer to markdown input text */
-  VALUE text = rb_funcall(self, id_text, 0);
-  Check_Type(text, T_STRING);
-  char * ptext = StringValuePtr(text);
-  buflength = 0;
-  maxlength = RSTRING(text)->len >= INCREMENT ?
-    RSTRING(text)->len :
-    INCREMENT;
-  inputbuf = malloc(maxlength);
-  curchar = inputbuf;
-  charstotab = TABSTOP;
-  while ((*curchar = *ptext++) != '\0') {
-    switch (*curchar) {
-      case '\t':
-        while (charstotab > 0)
-          *curchar = ' ', curchar++, buflength++, charstotab--;
-        break;
-      case '\n':
-        curchar++, buflength++, charstotab = TABSTOP;
-        break;
-      default:
-        curchar++, buflength++, charstotab--;
-    }
-    if (charstotab == 0)
-      charstotab = TABSTOP;
-    if (buflength > maxlength - TABSTOP - 3) {
-      maxlength += INCREMENT;
-      inputbuf = realloc(inputbuf, maxlength);
-      curchar = inputbuf + buflength;
-      if (inputbuf == NULL) {
-        /* TODO: no memory */
-      }
-    }
-  }
-  *curchar++ = '\n';
-  *curchar++ = '\n';
-  *curchar   = '\0';
-  buflength+= 2;
-  /* flip extension bits */
-  int extensions = 0;
-  if ( rb_funcall(self, id_smart, 0) == Qtrue )
-    extensions = extensions | EXT_SMART ;
-  if ( rb_funcall(self, id_notes, 0) == Qtrue )
-    extensions = extensions | EXT_NOTES ;
-  /* parse markdown input into sematic element tree */
-  parsed_input = markdown(inputbuf, extensions);
-  /* allocate output buffer and generate output */
-  output_buffer = rb_markdown_buffer_init(buflength * 2);
-  print_element(parsed_input, HTML_FORMAT);
-  rb_markdown_buffer_free();
-  return output_buffer;
+    /* grab char pointer to markdown input text */
+    VALUE text = rb_funcall(self, id_text, 0);
+    Check_Type(text, T_STRING);
+    char * ptext = StringValuePtr(text);
+    /* flip extension bits */
+    int extensions = 0;
+    if ( rb_funcall(self, id_smart, 0) == Qtrue )
+        extensions = extensions | EXT_SMART ;
+    if ( rb_funcall(self, id_notes, 0) == Qtrue )
+        extensions = extensions | EXT_NOTES ;
+    char *html = markdown_to_string(ptext, extensions, HTML_FORMAT);
+    VALUE result = rb_str_new2(html);
+    free(html);
+    return result;
 }
 void Init_markdown()
 {
-  /* Initialize frequently used Symbols */
-  id_text = rb_intern("text");
-  id_smart = rb_intern("smart");
-  id_notes = rb_intern("notes");
+    /* Initialize frequently used Symbols */
+    id_text = rb_intern("text");
+    id_smart = rb_intern("smart");
+    id_notes = rb_intern("notes");
-  rb_cMarkdown = rb_define_class("Markdown", rb_cObject);
-  rb_define_method(rb_cMarkdown, "to_html", markdown_to_html, 0);
+    rb_cMarkdown = rb_define_class("Markdown", rb_cObject);
+    rb_define_method(rb_cMarkdown, "to_html", rb_markdown_to_html, -1);
 }
+// vim: ts=4 sw=4

data/ext/markdown_lib.c ADDED Viewed

@@ -0,0 +1,181 @@
+/**********************************************************************
+  markdown_lib.c - markdown in C using a PEG grammar.
+  (c) 2008 John MacFarlane (jgm at berkeley dot edu).
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; version 2 of the License.
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+ ***********************************************************************/
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "markdown_peg.h"
+#define TABSTOP 4
+/* preformat_text - allocate and copy text buffer while
+ * performing tab expansion. */
+static GString *preformat_text(char *text) {
+    GString *buf;
+    char next_char;
+    int charstotab;
+    int len = 0;
+    buf = g_string_new("");
+    charstotab = TABSTOP;
+    while ((next_char = *text++) != '\0') {
+        switch (next_char) {
+        case '\t':
+            while (charstotab > 0)
+                g_string_append_c(buf, ' '), len++, charstotab--;
+            break;
+        case '\n':
+            g_string_append_c(buf, '\n'), len++, charstotab = TABSTOP;
+            break;
+        default:
+            g_string_append_c(buf, next_char), len++, charstotab--;
+        }
+        if (charstotab == 0)
+            charstotab = TABSTOP;
+    }
+    g_string_append(buf, "\n\n");
+    return(buf);
+}
+/* print_tree - print tree of elements, for debugging only. */
+static void print_tree(element * elt, int indent) {
+    int i;
+    char * key;
+    while (elt != NULL) {
+        for (i = 0; i < indent; i++)
+            fputc(' ', stderr);
+        switch (elt->key) {
+            case LIST:               key = "LIST"; break;
+            case RAW:                key = "RAW"; break;
+            case SPACE:              key = "SPACE"; break;
+            case LINEBREAK:          key = "LINEBREAK"; break;
+            case ELLIPSIS:           key = "ELLIPSIS"; break;
+            case EMDASH:             key = "EMDASH"; break;
+            case ENDASH:             key = "ENDASH"; break;
+            case APOSTROPHE:         key = "APOSTROPHE"; break;
+            case SINGLEQUOTED:       key = "SINGLEQUOTED"; break;
+            case DOUBLEQUOTED:       key = "DOUBLEQUOTED"; break;
+            case STR:                key = "STR"; break;
+            case LINK:               key = "LINK"; break;
+            case IMAGE:              key = "IMAGE"; break;
+            case CODE:               key = "CODE"; break;
+            case HTML:               key = "HTML"; break;
+            case EMPH:               key = "EMPH"; break;
+            case STRONG:             key = "STRONG"; break;
+            case PLAIN:              key = "PLAIN"; break;
+            case PARA:               key = "PARA"; break;
+            case LISTITEM:           key = "LISTITEM"; break;
+            case BULLETLIST:         key = "BULLETLIST"; break;
+            case ORDEREDLIST:        key = "ORDEREDLIST"; break;
+            case H1:                 key = "H1"; break;
+            case H2:                 key = "H2"; break;
+            case H3:                 key = "H3"; break;
+            case H4:                 key = "H4"; break;
+            case H5:                 key = "H5"; break;
+            case H6:                 key = "H6"; break;
+            case BLOCKQUOTE:         key = "BLOCKQUOTE"; break;
+            case VERBATIM:           key = "VERBATIM"; break;
+            case HTMLBLOCK:          key = "HTMLBLOCK"; break;
+            case HRULE:              key = "HRULE"; break;
+            case REFERENCE:          key = "REFERENCE"; break;
+            case NOTE:               key = "NOTE"; break;
+            default:                 key = "?";
+        }
+        if ( elt->key == STR ) {
+            fprintf(stderr, "0x%x: %s   '%s'\n", (int)elt, key, elt->contents.str);
+        } else {
+            fprintf(stderr, "0x%x: %s\n", (int)elt, key);
+        }
+        if (elt->children)
+            print_tree(elt->children, indent + 4);
+        elt = elt->next;
+    }
+}
+/* process_raw_blocks - traverses an element list, replacing any RAW elements with
+ * the result of parsing them as markdown text, and recursing into the children
+ * of parent elements.  The result should be a tree of elements without any RAWs. */
+static element * process_raw_blocks(element *input, int extensions, element *references, element *notes) {
+    element *current = NULL;
+    element *last_child = NULL;
+    char *contents;
+    current = input;
+    while (current != NULL) {
+        if (current->key == RAW) {
+            /* \001 is used to indicate boundaries between nested lists when there
+             * is no blank line.  We split the string by \001 and parse
+             * each chunk separately. */
+            contents = strtok(current->contents.str, "\001");
+            current->key = LIST;
+            current->children = parse_markdown(contents, extensions, references, notes);
+            last_child = current->children;
+            while ((contents = strtok(NULL, "\001"))) {
+                while (last_child->next != NULL)
+                    last_child = last_child->next;
+                last_child->next = parse_markdown(contents, extensions, references, notes);
+            }
+            free(current->contents.str);
+            current->contents.str = NULL;
+        }
+        if (current->children != NULL)
+            current->children = process_raw_blocks(current->children, extensions, references, notes);
+        current = current->next;
+    }
+    return input;
+}
+/* markdown_to_gstring - convert markdown text to the output format specified.
+ * Returns a GString, which must be freed after use using g_string_free(). */
+GString * markdown_to_g_string(char *text, int extensions, int output_format) {
+    element *result;
+    element *references;
+    element *notes;
+    GString *formatted_text;
+    GString *out;
+    out = g_string_new("");
+    formatted_text = preformat_text(text);
+    references = parse_references(formatted_text->str, extensions);
+    notes = parse_notes(formatted_text->str, extensions, references);
+    result = parse_markdown(formatted_text->str, extensions, references, notes);
+    result = process_raw_blocks(result, extensions, references, notes);
+    g_string_free(formatted_text, TRUE);
+    print_element_list(out, result, output_format, extensions);
+    free_element_list(result);
+    free_element_list(references);
+    return out;
+}
+/* markdown_to_string - convert markdown text to the output format specified.
+ * Returns a null-terminated string, which must be freed after use. */
+char * markdown_to_string(char *text, int extensions, int output_format) {
+    GString *out;
+    char *char_out;
+    out = markdown_to_g_string(text, extensions, output_format);
+    char_out = out->str;
+    g_string_free(out, FALSE);
+    return char_out;
+}
+/* vim:set ts=4 sw=4: */

data/ext/markdown_lib.h ADDED Viewed

@@ -0,0 +1,19 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <glib.h>
+enum markdown_extensions {
+    EXT_SMART            = 1,
+    EXT_NOTES            = 2
+};
+enum markdown_formats {
+    HTML_FORMAT,
+    LATEX_FORMAT,
+    GROFF_MM_FORMAT
+};
+GString * markdown_to_g_string(char *text, int extensions, int output_format);
+char * markdown_to_string(char *text, int extensions, int output_format);
+/* vim: set ts=4 sw=4 : */