nwodkram 0.2

Sign up to get free protection for your applications and to get access to all the features.
data/README.md ADDED
@@ -0,0 +1,28 @@
1
+ # Nwodkram #
2
+
3
+ nwodkram (markdown in reverse) is a tool to reverse html into markdown.
4
+
5
+ ## Usage ##
6
+
7
+ The gem reopens class String and adds a to_markdown method.
8
+ When valid html is given, it returns markdown.
9
+ html = File.open('page.html', 'r') {|f| f.read }
10
+ markdown = html.to_markdown
11
+
12
+ ## Use case ##
13
+
14
+ My use case was to migrate a blog from wordpress.com to something more civilised (and self-hosted).
15
+
16
+ ## Supports ##
17
+
18
+ *tags*:
19
+ p, a, img, ul, ol, li, h1, h2, h3, em, strong
20
+
21
+ *rubies*:
22
+ tested with MRI ruby 1.8.7 and 1.9.2 (preview1)
23
+
24
+ ## TODO ##
25
+
26
+ The tool doesn't support the [full markdown syntax](http://daringfireball.net/projects/markdown/syntax).
27
+
28
+ If this tool gets used, I could extend it. For now, it's suffient for my limited purposes.
data/Rakefile ADDED
@@ -0,0 +1,28 @@
1
+ require "rubygems"
2
+
3
+
4
+ task :default => :spec
5
+
6
+ begin
7
+ require 'spec/rake/spectask'
8
+ rescue LoadError
9
+ desc "Run specs"
10
+ task(:spec) { $stderr.puts '`gem install rspec` to run specs' }
11
+ else
12
+ desc "Run specs"
13
+ Spec::Rake::SpecTask.new do |t|
14
+ t.spec_opts = ['--options', "\"#{File.dirname(__FILE__)}/spec/spec.opts\""]
15
+ t.spec_files = FileList['spec/*_spec.rb']
16
+ end
17
+
18
+ desc "Run all specs in spec directory with RCov"
19
+ Spec::Rake::SpecTask.new(:rcov) do |t|
20
+ t.spec_opts = ['--options', "\"#{File.dirname(__FILE__)}/spec/spec.opts\""]
21
+ t.spec_files = FileList['spec/*_spec.rb']
22
+ t.rcov = true
23
+ t.rcov_opts = lambda do
24
+ IO.readlines(File.dirname(__FILE__) + "/spec/rcov.opts").map {|l| l.chomp.split " "}.flatten
25
+ end
26
+ end
27
+
28
+ end
data/lib/nwodkram.rb ADDED
@@ -0,0 +1,27 @@
1
+ require 'nokogiri'
2
+ require 'nwodkram_parser'
3
+ class Nwodkram
4
+
5
+ attr_accessor :html
6
+
7
+ def initialize(html)
8
+ @html = html
9
+ end
10
+
11
+ def to_markdown
12
+ parser = Nokogiri::HTML::SAX::Parser.new(NwodkramParser.new)
13
+ out = StringIO.new
14
+ $stdout = out
15
+ parser.parse(@html)
16
+ out.string
17
+ ensure
18
+ $stdout = STDOUT
19
+ end
20
+
21
+ end
22
+
23
+ class String
24
+ def to_markdown
25
+ Nwodkram.new(self).to_markdown
26
+ end
27
+ end
@@ -0,0 +1,69 @@
1
+ class NwodkramParser < Nokogiri::XML::SAX::Document
2
+
3
+ attr_accessor :attr, :name
4
+
5
+ MARKDOWN = { 'p' => ["" , "\n"],
6
+ 'a' => ["[" , lambda { "](#{@attr['href']})" }],
7
+ 'img' => [ lambda {"![#{@attr['title'] || @attr['alt']}](#{@attr['src']})"}, ""],
8
+ 'li' => [ lambda { @parent == "ul" ? "* " : "1. " } , "\n"],
9
+ 'code' => ["" , ""],
10
+ 'h1' => ["# " , " #\n" ],
11
+ 'h2' => ["## " , " ##\n" ],
12
+ 'h3' => ["### " , " ###\n"],
13
+ 'em' => ["*" , "*"],
14
+ 'strong' => ["**" , "**"] }
15
+
16
+ def start_element(name,attributes = [])
17
+ @name, @attr = name, attr_hash(attributes)
18
+ MARKDOWN[name] ? print(local_value(MARKDOWN[name][0])) : print("")
19
+ track_parent
20
+ end
21
+
22
+ def end_element(name)
23
+ MARKDOWN[name] ? print(local_value(MARKDOWN[name][1])) : print("")
24
+ end
25
+
26
+ # content of the markup
27
+ def characters(string)
28
+ case @name
29
+ when "code"
30
+ string.split("\n").each {|line|
31
+ print " #{line}\n" # 4 spaces to indicate code
32
+ }
33
+ when "img"
34
+ print "" # image doesn't contain any children, normally
35
+ when 'p','h1','h2','h3', 'li', 'ul','ol'
36
+ print string.chomp
37
+ else
38
+ # getting rid of annoying newlines
39
+ string[0] == 10 ? print("\n") : print(string)
40
+ end
41
+ end
42
+
43
+ def cdata_block(string)
44
+ print string
45
+ end
46
+
47
+ private
48
+ # way to handle the start and end of elements - local evaluation if the value is a proc
49
+ def local_value(value)
50
+ value.is_a?(Proc) ? instance_eval(&value) : value
51
+ end
52
+
53
+
54
+ # take attribute array and make it a hash
55
+ # structure of attributes [key1, value2, key2, value2,...]
56
+ def attr_hash(attributes)
57
+ output = {}
58
+ attributes.each_with_index do |attr,i|
59
+ output[attr] = attributes[i+1] if !i.odd? and i << (attributes.size-2)
60
+ end
61
+ output
62
+ end
63
+
64
+ # for ul and ol, it's necessary to keep track of which is which
65
+ def track_parent
66
+ @parent = @name unless @name == 'li'
67
+ end
68
+
69
+ end
data/nwodkram.gemspec ADDED
@@ -0,0 +1,26 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = 'nwodkram'
3
+ s.version = '0.2'
4
+ s.date = '2010-05-01'
5
+
6
+ s.description = 'Convert html into markdown'
7
+ s.summary = 'Convert html into markdown'
8
+
9
+ s.authors = ['Elise Huard']
10
+ s.email = 'nwodkram@elisehuard.be'
11
+
12
+ s.files = %w[
13
+ README.md
14
+ lib/nwodkram_parser.rb
15
+ lib/nwodkram.rb
16
+ Rakefile
17
+ nwodkram.gemspec
18
+ spec/spec_helper.rb
19
+ spec/spec.opts
20
+ spec/rcov.opts
21
+ spec/nwodkram_spec.rb
22
+ ]
23
+ s.test_files = s.files.select {|path| path =~ /^spec\/.*_spec.rb/ }
24
+ s.add_dependency 'nokogiri', '>= 1.4.1'
25
+ s.require_paths = %w[lib]
26
+ end
@@ -0,0 +1,75 @@
1
+ require File.expand_path(File.dirname(__FILE__) + "/spec_helper")
2
+ require 'stringio'
3
+
4
+ # Issue: BlueCloth surrounds every bit of html with a
5
+ describe "converting html to markdown" do
6
+
7
+ it "should not convert simple text" do
8
+ markdown = "hello hello\n"
9
+ html = markdown.to_html
10
+ html.to_markdown.should == markdown
11
+ end
12
+
13
+ it "should convert emphasis" do
14
+ markdown = "*NO* smoking\n"
15
+ html = markdown.to_html
16
+ html.to_markdown.should == markdown
17
+ end
18
+
19
+ it "should convert strong" do
20
+ markdown = "I was **born** under a wandering star\n"
21
+ html = markdown.to_html
22
+ html.to_markdown.should == markdown
23
+ end
24
+
25
+ it "should convert a link" do
26
+ markdown = "my favourite site [test](http://test.com) because it rules\n"
27
+ html = markdown.to_html
28
+ html.to_markdown.should == markdown
29
+ end
30
+
31
+ it "should convert an image" do
32
+ markdown = "![title](http://test.com/lalala.jpg)\n"
33
+ html = markdown.to_html
34
+ html.to_markdown.should == markdown
35
+ end
36
+
37
+ it "should convert an unordered list" do
38
+ markdown = "* test\n* test2\n"
39
+ html = markdown.to_html
40
+ html.to_markdown.should == markdown
41
+ end
42
+
43
+ it "should convert an ordered list" do
44
+ markdown = "1. test\n1. test2\n"
45
+ html = markdown.to_html
46
+ html.to_markdown.should == markdown
47
+ end
48
+
49
+ it "should convert a pre" do
50
+ markdown = " def test\n p today\n end\n"
51
+ html = markdown.to_html
52
+ html.to_markdown.should == markdown
53
+ end
54
+
55
+ it "should convert a h1 title" do
56
+ markdown = "# title #\n"
57
+ html = markdown.to_html
58
+ html.to_markdown.should == markdown
59
+ end
60
+
61
+ it "should convert a h3 title" do
62
+ markdown = "### title ###\n"
63
+ html = markdown.to_html
64
+ html.to_markdown.should == markdown
65
+ end
66
+
67
+ # the ultimate test: convert the readme
68
+ it "should convert a whole text" do
69
+ markdown = File.open(File.expand_path(File.dirname(__FILE__) + "/../README.md")).read
70
+ html = markdown.to_html
71
+ html.to_markdown.should == markdown
72
+ end
73
+
74
+ end
75
+
data/spec/rcov.opts ADDED
@@ -0,0 +1 @@
1
+ -x gems,spec
data/spec/spec.opts ADDED
@@ -0,0 +1,2 @@
1
+ --diff
2
+ --color
@@ -0,0 +1,25 @@
1
+ dir = File.dirname(__FILE__)
2
+ $LOAD_PATH.unshift "#{dir}/../lib"
3
+
4
+ require 'rubygems'
5
+ require 'spec'
6
+ require 'nwodkram'
7
+ require 'bluecloth' # markdown converter to test against
8
+ require 'rspec/matchers'
9
+
10
+ Spec::Runner.configure do |config|
11
+ end
12
+
13
+ class String
14
+ def to_html
15
+ bc = BlueCloth.new(self)
16
+ bc.to_html
17
+ end
18
+ end
19
+
20
+ Rspec::Matchers.define :convert_to_html_and_back do
21
+ match do |markdown|
22
+ html = markdown.to_html
23
+ html.to_markdown == markdown
24
+ end
25
+ end
metadata ADDED
@@ -0,0 +1,82 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: nwodkram
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 2
8
+ version: "0.2"
9
+ platform: ruby
10
+ authors:
11
+ - Elise Huard
12
+ autorequire:
13
+ bindir: bin
14
+ cert_chain: []
15
+
16
+ date: 2010-05-01 00:00:00 +02:00
17
+ default_executable:
18
+ dependencies:
19
+ - !ruby/object:Gem::Dependency
20
+ name: nokogiri
21
+ prerelease: false
22
+ requirement: &id001 !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ segments:
27
+ - 1
28
+ - 4
29
+ - 1
30
+ version: 1.4.1
31
+ type: :runtime
32
+ version_requirements: *id001
33
+ description: Convert html into markdown
34
+ email: nwodkram@elisehuard.be
35
+ executables: []
36
+
37
+ extensions: []
38
+
39
+ extra_rdoc_files: []
40
+
41
+ files:
42
+ - README.md
43
+ - lib/nwodkram_parser.rb
44
+ - lib/nwodkram.rb
45
+ - Rakefile
46
+ - nwodkram.gemspec
47
+ - spec/spec_helper.rb
48
+ - spec/spec.opts
49
+ - spec/rcov.opts
50
+ - spec/nwodkram_spec.rb
51
+ has_rdoc: true
52
+ homepage:
53
+ licenses: []
54
+
55
+ post_install_message:
56
+ rdoc_options: []
57
+
58
+ require_paths:
59
+ - lib
60
+ required_ruby_version: !ruby/object:Gem::Requirement
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ segments:
65
+ - 0
66
+ version: "0"
67
+ required_rubygems_version: !ruby/object:Gem::Requirement
68
+ requirements:
69
+ - - ">="
70
+ - !ruby/object:Gem::Version
71
+ segments:
72
+ - 0
73
+ version: "0"
74
+ requirements: []
75
+
76
+ rubyforge_project:
77
+ rubygems_version: 1.3.6
78
+ signing_key:
79
+ specification_version: 3
80
+ summary: Convert html into markdown
81
+ test_files:
82
+ - spec/nwodkram_spec.rb