nwodkram 0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +28 -0
- data/Rakefile +28 -0
- data/lib/nwodkram.rb +27 -0
- data/lib/nwodkram_parser.rb +69 -0
- data/nwodkram.gemspec +26 -0
- data/spec/nwodkram_spec.rb +75 -0
- data/spec/rcov.opts +1 -0
- data/spec/spec.opts +2 -0
- data/spec/spec_helper.rb +25 -0
- metadata +82 -0
data/README.md
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# Nwodkram #
|
2
|
+
|
3
|
+
nwodkram (markdown in reverse) is a tool to reverse html into markdown.
|
4
|
+
|
5
|
+
## Usage ##
|
6
|
+
|
7
|
+
The gem reopens class String and adds a to_markdown method.
|
8
|
+
When valid html is given, it returns markdown.
|
9
|
+
html = File.open('page.html', 'r') {|f| f.read }
|
10
|
+
markdown = html.to_markdown
|
11
|
+
|
12
|
+
## Use case ##
|
13
|
+
|
14
|
+
My use case was to migrate a blog from wordpress.com to something more civilised (and self-hosted).
|
15
|
+
|
16
|
+
## Supports ##
|
17
|
+
|
18
|
+
*tags*:
|
19
|
+
p, a, img, ul, ol, li, h1, h2, h3, em, strong
|
20
|
+
|
21
|
+
*rubies*:
|
22
|
+
tested with MRI ruby 1.8.7 and 1.9.2 (preview1)
|
23
|
+
|
24
|
+
## TODO ##
|
25
|
+
|
26
|
+
The tool doesn't support the [full markdown syntax](http://daringfireball.net/projects/markdown/syntax).
|
27
|
+
|
28
|
+
If this tool gets used, I could extend it. For now, it's suffient for my limited purposes.
|
data/Rakefile
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
require "rubygems"
|
2
|
+
|
3
|
+
|
4
|
+
task :default => :spec
|
5
|
+
|
6
|
+
begin
|
7
|
+
require 'spec/rake/spectask'
|
8
|
+
rescue LoadError
|
9
|
+
desc "Run specs"
|
10
|
+
task(:spec) { $stderr.puts '`gem install rspec` to run specs' }
|
11
|
+
else
|
12
|
+
desc "Run specs"
|
13
|
+
Spec::Rake::SpecTask.new do |t|
|
14
|
+
t.spec_opts = ['--options', "\"#{File.dirname(__FILE__)}/spec/spec.opts\""]
|
15
|
+
t.spec_files = FileList['spec/*_spec.rb']
|
16
|
+
end
|
17
|
+
|
18
|
+
desc "Run all specs in spec directory with RCov"
|
19
|
+
Spec::Rake::SpecTask.new(:rcov) do |t|
|
20
|
+
t.spec_opts = ['--options', "\"#{File.dirname(__FILE__)}/spec/spec.opts\""]
|
21
|
+
t.spec_files = FileList['spec/*_spec.rb']
|
22
|
+
t.rcov = true
|
23
|
+
t.rcov_opts = lambda do
|
24
|
+
IO.readlines(File.dirname(__FILE__) + "/spec/rcov.opts").map {|l| l.chomp.split " "}.flatten
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
data/lib/nwodkram.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'nwodkram_parser'
|
3
|
+
class Nwodkram
|
4
|
+
|
5
|
+
attr_accessor :html
|
6
|
+
|
7
|
+
def initialize(html)
|
8
|
+
@html = html
|
9
|
+
end
|
10
|
+
|
11
|
+
def to_markdown
|
12
|
+
parser = Nokogiri::HTML::SAX::Parser.new(NwodkramParser.new)
|
13
|
+
out = StringIO.new
|
14
|
+
$stdout = out
|
15
|
+
parser.parse(@html)
|
16
|
+
out.string
|
17
|
+
ensure
|
18
|
+
$stdout = STDOUT
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
|
23
|
+
class String
|
24
|
+
def to_markdown
|
25
|
+
Nwodkram.new(self).to_markdown
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
class NwodkramParser < Nokogiri::XML::SAX::Document
|
2
|
+
|
3
|
+
attr_accessor :attr, :name
|
4
|
+
|
5
|
+
MARKDOWN = { 'p' => ["" , "\n"],
|
6
|
+
'a' => ["[" , lambda { "](#{@attr['href']})" }],
|
7
|
+
'img' => [ lambda {"![#{@attr['title'] || @attr['alt']}](#{@attr['src']})"}, ""],
|
8
|
+
'li' => [ lambda { @parent == "ul" ? "* " : "1. " } , "\n"],
|
9
|
+
'code' => ["" , ""],
|
10
|
+
'h1' => ["# " , " #\n" ],
|
11
|
+
'h2' => ["## " , " ##\n" ],
|
12
|
+
'h3' => ["### " , " ###\n"],
|
13
|
+
'em' => ["*" , "*"],
|
14
|
+
'strong' => ["**" , "**"] }
|
15
|
+
|
16
|
+
def start_element(name,attributes = [])
|
17
|
+
@name, @attr = name, attr_hash(attributes)
|
18
|
+
MARKDOWN[name] ? print(local_value(MARKDOWN[name][0])) : print("")
|
19
|
+
track_parent
|
20
|
+
end
|
21
|
+
|
22
|
+
def end_element(name)
|
23
|
+
MARKDOWN[name] ? print(local_value(MARKDOWN[name][1])) : print("")
|
24
|
+
end
|
25
|
+
|
26
|
+
# content of the markup
|
27
|
+
def characters(string)
|
28
|
+
case @name
|
29
|
+
when "code"
|
30
|
+
string.split("\n").each {|line|
|
31
|
+
print " #{line}\n" # 4 spaces to indicate code
|
32
|
+
}
|
33
|
+
when "img"
|
34
|
+
print "" # image doesn't contain any children, normally
|
35
|
+
when 'p','h1','h2','h3', 'li', 'ul','ol'
|
36
|
+
print string.chomp
|
37
|
+
else
|
38
|
+
# getting rid of annoying newlines
|
39
|
+
string[0] == 10 ? print("\n") : print(string)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def cdata_block(string)
|
44
|
+
print string
|
45
|
+
end
|
46
|
+
|
47
|
+
private
|
48
|
+
# way to handle the start and end of elements - local evaluation if the value is a proc
|
49
|
+
def local_value(value)
|
50
|
+
value.is_a?(Proc) ? instance_eval(&value) : value
|
51
|
+
end
|
52
|
+
|
53
|
+
|
54
|
+
# take attribute array and make it a hash
|
55
|
+
# structure of attributes [key1, value2, key2, value2,...]
|
56
|
+
def attr_hash(attributes)
|
57
|
+
output = {}
|
58
|
+
attributes.each_with_index do |attr,i|
|
59
|
+
output[attr] = attributes[i+1] if !i.odd? and i << (attributes.size-2)
|
60
|
+
end
|
61
|
+
output
|
62
|
+
end
|
63
|
+
|
64
|
+
# for ul and ol, it's necessary to keep track of which is which
|
65
|
+
def track_parent
|
66
|
+
@parent = @name unless @name == 'li'
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
data/nwodkram.gemspec
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = 'nwodkram'
|
3
|
+
s.version = '0.2'
|
4
|
+
s.date = '2010-05-01'
|
5
|
+
|
6
|
+
s.description = 'Convert html into markdown'
|
7
|
+
s.summary = 'Convert html into markdown'
|
8
|
+
|
9
|
+
s.authors = ['Elise Huard']
|
10
|
+
s.email = 'nwodkram@elisehuard.be'
|
11
|
+
|
12
|
+
s.files = %w[
|
13
|
+
README.md
|
14
|
+
lib/nwodkram_parser.rb
|
15
|
+
lib/nwodkram.rb
|
16
|
+
Rakefile
|
17
|
+
nwodkram.gemspec
|
18
|
+
spec/spec_helper.rb
|
19
|
+
spec/spec.opts
|
20
|
+
spec/rcov.opts
|
21
|
+
spec/nwodkram_spec.rb
|
22
|
+
]
|
23
|
+
s.test_files = s.files.select {|path| path =~ /^spec\/.*_spec.rb/ }
|
24
|
+
s.add_dependency 'nokogiri', '>= 1.4.1'
|
25
|
+
s.require_paths = %w[lib]
|
26
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + "/spec_helper")
|
2
|
+
require 'stringio'
|
3
|
+
|
4
|
+
# Issue: BlueCloth surrounds every bit of html with a
|
5
|
+
describe "converting html to markdown" do
|
6
|
+
|
7
|
+
it "should not convert simple text" do
|
8
|
+
markdown = "hello hello\n"
|
9
|
+
html = markdown.to_html
|
10
|
+
html.to_markdown.should == markdown
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should convert emphasis" do
|
14
|
+
markdown = "*NO* smoking\n"
|
15
|
+
html = markdown.to_html
|
16
|
+
html.to_markdown.should == markdown
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should convert strong" do
|
20
|
+
markdown = "I was **born** under a wandering star\n"
|
21
|
+
html = markdown.to_html
|
22
|
+
html.to_markdown.should == markdown
|
23
|
+
end
|
24
|
+
|
25
|
+
it "should convert a link" do
|
26
|
+
markdown = "my favourite site [test](http://test.com) because it rules\n"
|
27
|
+
html = markdown.to_html
|
28
|
+
html.to_markdown.should == markdown
|
29
|
+
end
|
30
|
+
|
31
|
+
it "should convert an image" do
|
32
|
+
markdown = "![title](http://test.com/lalala.jpg)\n"
|
33
|
+
html = markdown.to_html
|
34
|
+
html.to_markdown.should == markdown
|
35
|
+
end
|
36
|
+
|
37
|
+
it "should convert an unordered list" do
|
38
|
+
markdown = "* test\n* test2\n"
|
39
|
+
html = markdown.to_html
|
40
|
+
html.to_markdown.should == markdown
|
41
|
+
end
|
42
|
+
|
43
|
+
it "should convert an ordered list" do
|
44
|
+
markdown = "1. test\n1. test2\n"
|
45
|
+
html = markdown.to_html
|
46
|
+
html.to_markdown.should == markdown
|
47
|
+
end
|
48
|
+
|
49
|
+
it "should convert a pre" do
|
50
|
+
markdown = " def test\n p today\n end\n"
|
51
|
+
html = markdown.to_html
|
52
|
+
html.to_markdown.should == markdown
|
53
|
+
end
|
54
|
+
|
55
|
+
it "should convert a h1 title" do
|
56
|
+
markdown = "# title #\n"
|
57
|
+
html = markdown.to_html
|
58
|
+
html.to_markdown.should == markdown
|
59
|
+
end
|
60
|
+
|
61
|
+
it "should convert a h3 title" do
|
62
|
+
markdown = "### title ###\n"
|
63
|
+
html = markdown.to_html
|
64
|
+
html.to_markdown.should == markdown
|
65
|
+
end
|
66
|
+
|
67
|
+
# the ultimate test: convert the readme
|
68
|
+
it "should convert a whole text" do
|
69
|
+
markdown = File.open(File.expand_path(File.dirname(__FILE__) + "/../README.md")).read
|
70
|
+
html = markdown.to_html
|
71
|
+
html.to_markdown.should == markdown
|
72
|
+
end
|
73
|
+
|
74
|
+
end
|
75
|
+
|
data/spec/rcov.opts
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
-x gems,spec
|
data/spec/spec.opts
ADDED
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
dir = File.dirname(__FILE__)
|
2
|
+
$LOAD_PATH.unshift "#{dir}/../lib"
|
3
|
+
|
4
|
+
require 'rubygems'
|
5
|
+
require 'spec'
|
6
|
+
require 'nwodkram'
|
7
|
+
require 'bluecloth' # markdown converter to test against
|
8
|
+
require 'rspec/matchers'
|
9
|
+
|
10
|
+
Spec::Runner.configure do |config|
|
11
|
+
end
|
12
|
+
|
13
|
+
class String
|
14
|
+
def to_html
|
15
|
+
bc = BlueCloth.new(self)
|
16
|
+
bc.to_html
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
Rspec::Matchers.define :convert_to_html_and_back do
|
21
|
+
match do |markdown|
|
22
|
+
html = markdown.to_html
|
23
|
+
html.to_markdown == markdown
|
24
|
+
end
|
25
|
+
end
|
metadata
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: nwodkram
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 2
|
8
|
+
version: "0.2"
|
9
|
+
platform: ruby
|
10
|
+
authors:
|
11
|
+
- Elise Huard
|
12
|
+
autorequire:
|
13
|
+
bindir: bin
|
14
|
+
cert_chain: []
|
15
|
+
|
16
|
+
date: 2010-05-01 00:00:00 +02:00
|
17
|
+
default_executable:
|
18
|
+
dependencies:
|
19
|
+
- !ruby/object:Gem::Dependency
|
20
|
+
name: nokogiri
|
21
|
+
prerelease: false
|
22
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
segments:
|
27
|
+
- 1
|
28
|
+
- 4
|
29
|
+
- 1
|
30
|
+
version: 1.4.1
|
31
|
+
type: :runtime
|
32
|
+
version_requirements: *id001
|
33
|
+
description: Convert html into markdown
|
34
|
+
email: nwodkram@elisehuard.be
|
35
|
+
executables: []
|
36
|
+
|
37
|
+
extensions: []
|
38
|
+
|
39
|
+
extra_rdoc_files: []
|
40
|
+
|
41
|
+
files:
|
42
|
+
- README.md
|
43
|
+
- lib/nwodkram_parser.rb
|
44
|
+
- lib/nwodkram.rb
|
45
|
+
- Rakefile
|
46
|
+
- nwodkram.gemspec
|
47
|
+
- spec/spec_helper.rb
|
48
|
+
- spec/spec.opts
|
49
|
+
- spec/rcov.opts
|
50
|
+
- spec/nwodkram_spec.rb
|
51
|
+
has_rdoc: true
|
52
|
+
homepage:
|
53
|
+
licenses: []
|
54
|
+
|
55
|
+
post_install_message:
|
56
|
+
rdoc_options: []
|
57
|
+
|
58
|
+
require_paths:
|
59
|
+
- lib
|
60
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
61
|
+
requirements:
|
62
|
+
- - ">="
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
segments:
|
65
|
+
- 0
|
66
|
+
version: "0"
|
67
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
68
|
+
requirements:
|
69
|
+
- - ">="
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
segments:
|
72
|
+
- 0
|
73
|
+
version: "0"
|
74
|
+
requirements: []
|
75
|
+
|
76
|
+
rubyforge_project:
|
77
|
+
rubygems_version: 1.3.6
|
78
|
+
signing_key:
|
79
|
+
specification_version: 3
|
80
|
+
summary: Convert html into markdown
|
81
|
+
test_files:
|
82
|
+
- spec/nwodkram_spec.rb
|