RubyGems - nwodkram - Versions diffs - 0.2 - Mend

Files changed (10) hide show

data/README.md ADDED Viewed

@@ -0,0 +1,28 @@
+# Nwodkram #
+nwodkram (markdown in reverse) is a tool to reverse html into markdown.
+## Usage ##
+The gem reopens class String and adds a to_markdown method.
+When valid html is given, it returns markdown.
+    html = File.open('page.html', 'r') {|f| f.read }
+    markdown = html.to_markdown
+## Use case ##
+My use case was to migrate a blog from wordpress.com to something more civilised (and self-hosted).
+## Supports ##
+*tags*:
+    p, a, img, ul, ol, li, h1, h2, h3, em, strong
+*rubies*:
+tested with MRI ruby 1.8.7 and 1.9.2 (preview1)
+## TODO ##
+The tool doesn't support the [full markdown syntax](http://daringfireball.net/projects/markdown/syntax).
+If this tool gets used, I could extend it.  For now, it's suffient for my limited purposes.

data/Rakefile ADDED Viewed

@@ -0,0 +1,28 @@
+require "rubygems"
+task :default => :spec
+begin
+  require 'spec/rake/spectask'
+rescue LoadError
+  desc "Run specs"
+  task(:spec) { $stderr.puts '`gem install rspec` to run specs' }
+else
+  desc "Run specs"
+  Spec::Rake::SpecTask.new do |t|
+    t.spec_opts = ['--options', "\"#{File.dirname(__FILE__)}/spec/spec.opts\""]
+    t.spec_files = FileList['spec/*_spec.rb']
+  end
+  desc "Run all specs in spec directory with RCov"
+  Spec::Rake::SpecTask.new(:rcov) do |t|
+    t.spec_opts = ['--options', "\"#{File.dirname(__FILE__)}/spec/spec.opts\""]
+    t.spec_files = FileList['spec/*_spec.rb']
+    t.rcov = true
+    t.rcov_opts = lambda do
+      IO.readlines(File.dirname(__FILE__) + "/spec/rcov.opts").map {|l| l.chomp.split " "}.flatten
+    end
+  end
+end

data/lib/nwodkram.rb ADDED Viewed

@@ -0,0 +1,27 @@
+require 'nokogiri'
+require 'nwodkram_parser'
+class Nwodkram
+  attr_accessor :html
+  def initialize(html)
+    @html = html
+  end
+  def to_markdown
+    parser = Nokogiri::HTML::SAX::Parser.new(NwodkramParser.new)
+    out = StringIO.new
+    $stdout = out
+    parser.parse(@html)
+    out.string
+  ensure
+    $stdout = STDOUT
+  end
+end
+class String
+  def to_markdown
+    Nwodkram.new(self).to_markdown
+  end
+end

data/lib/nwodkram_parser.rb ADDED Viewed

@@ -0,0 +1,69 @@
+class NwodkramParser < Nokogiri::XML::SAX::Document
+  attr_accessor :attr, :name
+  MARKDOWN = { 'p'      => [""                                                               , "\n"],
+               'a'      => ["["                                                              , lambda { "](#{@attr['href']})" }],
+               'img'    => [ lambda {"![#{@attr['title'] || @attr['alt']}](#{@attr['src']})"}, ""],
+               'li'     => [ lambda { @parent == "ul" ? "* " : "1. " }                       , "\n"],
+               'code'   => [""                                                               , ""],
+               'h1'     => ["# "                                                             , " #\n" ],
+               'h2'     => ["## "                                                            , " ##\n" ],
+               'h3'     => ["### "                                                           , " ###\n"],
+               'em'     => ["*"                                                              , "*"],
+               'strong' => ["**"                                                             , "**"] }
+  def start_element(name,attributes = [])
+    @name, @attr = name, attr_hash(attributes)
+    MARKDOWN[name] ? print(local_value(MARKDOWN[name][0])) : print("")
+    track_parent
+  end
+  def end_element(name)
+    MARKDOWN[name] ? print(local_value(MARKDOWN[name][1])) : print("")
+  end
+  # content of the markup
+  def characters(string)
+    case @name
+    when "code"
+      string.split("\n").each {|line|
+        print "    #{line}\n" # 4 spaces to indicate code
+      }
+    when "img"
+      print "" # image doesn't contain any children, normally
+    when 'p','h1','h2','h3', 'li', 'ul','ol'
+      print string.chomp
+    else
+      # getting rid of annoying newlines
+      string[0] == 10 ? print("\n") : print(string)
+    end
+  end
+  def cdata_block(string)
+    print string
+  end
+private
+  # way to handle the start and end of elements - local evaluation if the value is a proc
+  def local_value(value)
+    value.is_a?(Proc) ? instance_eval(&value) : value
+  end
+  # take attribute array and make it a hash
+  # structure of attributes [key1, value2, key2, value2,...]
+  def attr_hash(attributes)
+    output = {}
+    attributes.each_with_index do |attr,i|
+      output[attr] = attributes[i+1] if !i.odd? and i << (attributes.size-2)
+    end
+    output
+  end
+  # for ul and ol, it's necessary to keep track of which is which
+  def track_parent
+    @parent = @name unless @name == 'li'
+  end
+end

data/nwodkram.gemspec ADDED Viewed

@@ -0,0 +1,26 @@
+Gem::Specification.new do |s|
+  s.name = 'nwodkram'
+  s.version = '0.2'
+  s.date = '2010-05-01'
+  s.description = 'Convert html into markdown'
+  s.summary = 'Convert html into markdown'
+  s.authors = ['Elise Huard']
+  s.email = 'nwodkram@elisehuard.be'
+  s.files = %w[
+              README.md
+              lib/nwodkram_parser.rb
+              lib/nwodkram.rb
+              Rakefile
+              nwodkram.gemspec
+              spec/spec_helper.rb
+              spec/spec.opts
+              spec/rcov.opts
+              spec/nwodkram_spec.rb
+             ]
+  s.test_files = s.files.select {|path| path =~ /^spec\/.*_spec.rb/ }
+  s.add_dependency 'nokogiri', '>= 1.4.1'
+  s.require_paths = %w[lib]
+end

data/spec/nwodkram_spec.rb ADDED Viewed

@@ -0,0 +1,75 @@
+require File.expand_path(File.dirname(__FILE__) + "/spec_helper")
+require 'stringio'
+# Issue: BlueCloth surrounds every bit of html with a
+describe "converting html to markdown" do
+  it "should not convert simple text" do
+    markdown = "hello hello\n"
+    html = markdown.to_html
+    html.to_markdown.should == markdown
+  end
+  it "should convert emphasis" do
+    markdown = "*NO* smoking\n"
+    html = markdown.to_html
+    html.to_markdown.should == markdown
+  end
+  it "should convert strong" do
+    markdown = "I was **born** under a wandering star\n"
+    html = markdown.to_html
+    html.to_markdown.should == markdown
+  end
+  it "should convert a link" do
+    markdown = "my favourite site [test](http://test.com) because it rules\n"
+    html = markdown.to_html
+    html.to_markdown.should == markdown
+  end
+  it "should convert an image" do
+    markdown = "![title](http://test.com/lalala.jpg)\n"
+    html = markdown.to_html
+    html.to_markdown.should == markdown
+  end
+  it "should convert an unordered list" do
+    markdown = "* test\n* test2\n"
+    html = markdown.to_html
+    html.to_markdown.should == markdown
+  end
+  it "should convert an ordered list" do
+    markdown = "1. test\n1. test2\n"
+    html = markdown.to_html
+    html.to_markdown.should == markdown
+  end
+  it "should convert a pre" do
+    markdown = "    def test\n      p today\n    end\n"
+    html = markdown.to_html
+    html.to_markdown.should == markdown
+  end
+  it "should convert a h1 title" do
+    markdown = "# title #\n"
+    html = markdown.to_html
+    html.to_markdown.should == markdown
+  end
+  it "should convert a h3 title" do
+    markdown = "### title ###\n"
+    html = markdown.to_html
+    html.to_markdown.should == markdown
+  end
+  # the ultimate test: convert the readme
+  it "should convert a whole text" do
+    markdown = File.open(File.expand_path(File.dirname(__FILE__) + "/../README.md")).read
+    html = markdown.to_html
+    html.to_markdown.should == markdown
+  end
+end

data/spec/rcov.opts ADDED Viewed

	@@ -0,0 +1 @@
1	+ -x gems,spec

data/spec/spec.opts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ --diff
2	+ --color

data/spec/spec_helper.rb ADDED Viewed

@@ -0,0 +1,25 @@
+dir = File.dirname(__FILE__)
+$LOAD_PATH.unshift "#{dir}/../lib"
+require 'rubygems'
+require 'spec'
+require 'nwodkram'
+require 'bluecloth' # markdown converter to test against
+require 'rspec/matchers'
+Spec::Runner.configure do |config|
+end
+class String
+  def to_html
+    bc = BlueCloth.new(self)
+    bc.to_html
+  end
+end
+Rspec::Matchers.define :convert_to_html_and_back do
+  match do |markdown|
+    html = markdown.to_html
+    html.to_markdown == markdown
+  end
+end

metadata ADDED Viewed

@@ -0,0 +1,82 @@
+--- !ruby/object:Gem::Specification
+name: nwodkram
+version: !ruby/object:Gem::Version
+  prerelease: false
+  segments:
+  - 0
+  - 2
+  version: "0.2"
+platform: ruby
+authors:
+- Elise Huard
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2010-05-01 00:00:00 +02:00
+default_executable:
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: nokogiri
+  prerelease: false
+  requirement: &id001 !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        segments:
+        - 1
+        - 4
+        - 1
+        version: 1.4.1
+  type: :runtime
+  version_requirements: *id001
+description: Convert html into markdown
+email: nwodkram@elisehuard.be
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- README.md
+- lib/nwodkram_parser.rb
+- lib/nwodkram.rb
+- Rakefile
+- nwodkram.gemspec
+- spec/spec_helper.rb
+- spec/spec.opts
+- spec/rcov.opts
+- spec/nwodkram_spec.rb
+has_rdoc: true
+homepage:
+licenses: []
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      segments:
+      - 0
+      version: "0"
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      segments:
+      - 0
+      version: "0"
+requirements: []
+rubyforge_project:
+rubygems_version: 1.3.6
+signing_key:
+specification_version: 3
+summary: Convert html into markdown
+test_files:
+- spec/nwodkram_spec.rb

nwodkram 0.2