nwodkram 0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +28 -0
- data/Rakefile +28 -0
- data/lib/nwodkram.rb +27 -0
- data/lib/nwodkram_parser.rb +69 -0
- data/nwodkram.gemspec +26 -0
- data/spec/nwodkram_spec.rb +75 -0
- data/spec/rcov.opts +1 -0
- data/spec/spec.opts +2 -0
- data/spec/spec_helper.rb +25 -0
- metadata +82 -0
data/README.md
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# Nwodkram #
|
2
|
+
|
3
|
+
nwodkram (markdown in reverse) is a tool to reverse html into markdown.
|
4
|
+
|
5
|
+
## Usage ##
|
6
|
+
|
7
|
+
The gem reopens class String and adds a to_markdown method.
|
8
|
+
When valid html is given, it returns markdown.
|
9
|
+
html = File.open('page.html', 'r') {|f| f.read }
|
10
|
+
markdown = html.to_markdown
|
11
|
+
|
12
|
+
## Use case ##
|
13
|
+
|
14
|
+
My use case was to migrate a blog from wordpress.com to something more civilised (and self-hosted).
|
15
|
+
|
16
|
+
## Supports ##
|
17
|
+
|
18
|
+
*tags*:
|
19
|
+
p, a, img, ul, ol, li, h1, h2, h3, em, strong
|
20
|
+
|
21
|
+
*rubies*:
|
22
|
+
tested with MRI ruby 1.8.7 and 1.9.2 (preview1)
|
23
|
+
|
24
|
+
## TODO ##
|
25
|
+
|
26
|
+
The tool doesn't support the [full markdown syntax](http://daringfireball.net/projects/markdown/syntax).
|
27
|
+
|
28
|
+
If this tool gets used, I could extend it. For now, it's suffient for my limited purposes.
|
data/Rakefile
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
require "rubygems"
|
2
|
+
|
3
|
+
|
4
|
+
task :default => :spec
|
5
|
+
|
6
|
+
begin
|
7
|
+
require 'spec/rake/spectask'
|
8
|
+
rescue LoadError
|
9
|
+
desc "Run specs"
|
10
|
+
task(:spec) { $stderr.puts '`gem install rspec` to run specs' }
|
11
|
+
else
|
12
|
+
desc "Run specs"
|
13
|
+
Spec::Rake::SpecTask.new do |t|
|
14
|
+
t.spec_opts = ['--options', "\"#{File.dirname(__FILE__)}/spec/spec.opts\""]
|
15
|
+
t.spec_files = FileList['spec/*_spec.rb']
|
16
|
+
end
|
17
|
+
|
18
|
+
desc "Run all specs in spec directory with RCov"
|
19
|
+
Spec::Rake::SpecTask.new(:rcov) do |t|
|
20
|
+
t.spec_opts = ['--options', "\"#{File.dirname(__FILE__)}/spec/spec.opts\""]
|
21
|
+
t.spec_files = FileList['spec/*_spec.rb']
|
22
|
+
t.rcov = true
|
23
|
+
t.rcov_opts = lambda do
|
24
|
+
IO.readlines(File.dirname(__FILE__) + "/spec/rcov.opts").map {|l| l.chomp.split " "}.flatten
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
data/lib/nwodkram.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'nwodkram_parser'
|
3
|
+
class Nwodkram
|
4
|
+
|
5
|
+
attr_accessor :html
|
6
|
+
|
7
|
+
def initialize(html)
|
8
|
+
@html = html
|
9
|
+
end
|
10
|
+
|
11
|
+
def to_markdown
|
12
|
+
parser = Nokogiri::HTML::SAX::Parser.new(NwodkramParser.new)
|
13
|
+
out = StringIO.new
|
14
|
+
$stdout = out
|
15
|
+
parser.parse(@html)
|
16
|
+
out.string
|
17
|
+
ensure
|
18
|
+
$stdout = STDOUT
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
|
23
|
+
class String
|
24
|
+
def to_markdown
|
25
|
+
Nwodkram.new(self).to_markdown
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
class NwodkramParser < Nokogiri::XML::SAX::Document
|
2
|
+
|
3
|
+
attr_accessor :attr, :name
|
4
|
+
|
5
|
+
MARKDOWN = { 'p' => ["" , "\n"],
|
6
|
+
'a' => ["[" , lambda { "](#{@attr['href']})" }],
|
7
|
+
'img' => [ lambda {"![#{@attr['title'] || @attr['alt']}](#{@attr['src']})"}, ""],
|
8
|
+
'li' => [ lambda { @parent == "ul" ? "* " : "1. " } , "\n"],
|
9
|
+
'code' => ["" , ""],
|
10
|
+
'h1' => ["# " , " #\n" ],
|
11
|
+
'h2' => ["## " , " ##\n" ],
|
12
|
+
'h3' => ["### " , " ###\n"],
|
13
|
+
'em' => ["*" , "*"],
|
14
|
+
'strong' => ["**" , "**"] }
|
15
|
+
|
16
|
+
def start_element(name,attributes = [])
|
17
|
+
@name, @attr = name, attr_hash(attributes)
|
18
|
+
MARKDOWN[name] ? print(local_value(MARKDOWN[name][0])) : print("")
|
19
|
+
track_parent
|
20
|
+
end
|
21
|
+
|
22
|
+
def end_element(name)
|
23
|
+
MARKDOWN[name] ? print(local_value(MARKDOWN[name][1])) : print("")
|
24
|
+
end
|
25
|
+
|
26
|
+
# content of the markup
|
27
|
+
def characters(string)
|
28
|
+
case @name
|
29
|
+
when "code"
|
30
|
+
string.split("\n").each {|line|
|
31
|
+
print " #{line}\n" # 4 spaces to indicate code
|
32
|
+
}
|
33
|
+
when "img"
|
34
|
+
print "" # image doesn't contain any children, normally
|
35
|
+
when 'p','h1','h2','h3', 'li', 'ul','ol'
|
36
|
+
print string.chomp
|
37
|
+
else
|
38
|
+
# getting rid of annoying newlines
|
39
|
+
string[0] == 10 ? print("\n") : print(string)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def cdata_block(string)
|
44
|
+
print string
|
45
|
+
end
|
46
|
+
|
47
|
+
private
|
48
|
+
# way to handle the start and end of elements - local evaluation if the value is a proc
|
49
|
+
def local_value(value)
|
50
|
+
value.is_a?(Proc) ? instance_eval(&value) : value
|
51
|
+
end
|
52
|
+
|
53
|
+
|
54
|
+
# take attribute array and make it a hash
|
55
|
+
# structure of attributes [key1, value2, key2, value2,...]
|
56
|
+
def attr_hash(attributes)
|
57
|
+
output = {}
|
58
|
+
attributes.each_with_index do |attr,i|
|
59
|
+
output[attr] = attributes[i+1] if !i.odd? and i << (attributes.size-2)
|
60
|
+
end
|
61
|
+
output
|
62
|
+
end
|
63
|
+
|
64
|
+
# for ul and ol, it's necessary to keep track of which is which
|
65
|
+
def track_parent
|
66
|
+
@parent = @name unless @name == 'li'
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
data/nwodkram.gemspec
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = 'nwodkram'
|
3
|
+
s.version = '0.2'
|
4
|
+
s.date = '2010-05-01'
|
5
|
+
|
6
|
+
s.description = 'Convert html into markdown'
|
7
|
+
s.summary = 'Convert html into markdown'
|
8
|
+
|
9
|
+
s.authors = ['Elise Huard']
|
10
|
+
s.email = 'nwodkram@elisehuard.be'
|
11
|
+
|
12
|
+
s.files = %w[
|
13
|
+
README.md
|
14
|
+
lib/nwodkram_parser.rb
|
15
|
+
lib/nwodkram.rb
|
16
|
+
Rakefile
|
17
|
+
nwodkram.gemspec
|
18
|
+
spec/spec_helper.rb
|
19
|
+
spec/spec.opts
|
20
|
+
spec/rcov.opts
|
21
|
+
spec/nwodkram_spec.rb
|
22
|
+
]
|
23
|
+
s.test_files = s.files.select {|path| path =~ /^spec\/.*_spec.rb/ }
|
24
|
+
s.add_dependency 'nokogiri', '>= 1.4.1'
|
25
|
+
s.require_paths = %w[lib]
|
26
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + "/spec_helper")
|
2
|
+
require 'stringio'
|
3
|
+
|
4
|
+
# Issue: BlueCloth surrounds every bit of html with a
|
5
|
+
describe "converting html to markdown" do
|
6
|
+
|
7
|
+
it "should not convert simple text" do
|
8
|
+
markdown = "hello hello\n"
|
9
|
+
html = markdown.to_html
|
10
|
+
html.to_markdown.should == markdown
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should convert emphasis" do
|
14
|
+
markdown = "*NO* smoking\n"
|
15
|
+
html = markdown.to_html
|
16
|
+
html.to_markdown.should == markdown
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should convert strong" do
|
20
|
+
markdown = "I was **born** under a wandering star\n"
|
21
|
+
html = markdown.to_html
|
22
|
+
html.to_markdown.should == markdown
|
23
|
+
end
|
24
|
+
|
25
|
+
it "should convert a link" do
|
26
|
+
markdown = "my favourite site [test](http://test.com) because it rules\n"
|
27
|
+
html = markdown.to_html
|
28
|
+
html.to_markdown.should == markdown
|
29
|
+
end
|
30
|
+
|
31
|
+
it "should convert an image" do
|
32
|
+
markdown = "\n"
|
33
|
+
html = markdown.to_html
|
34
|
+
html.to_markdown.should == markdown
|
35
|
+
end
|
36
|
+
|
37
|
+
it "should convert an unordered list" do
|
38
|
+
markdown = "* test\n* test2\n"
|
39
|
+
html = markdown.to_html
|
40
|
+
html.to_markdown.should == markdown
|
41
|
+
end
|
42
|
+
|
43
|
+
it "should convert an ordered list" do
|
44
|
+
markdown = "1. test\n1. test2\n"
|
45
|
+
html = markdown.to_html
|
46
|
+
html.to_markdown.should == markdown
|
47
|
+
end
|
48
|
+
|
49
|
+
it "should convert a pre" do
|
50
|
+
markdown = " def test\n p today\n end\n"
|
51
|
+
html = markdown.to_html
|
52
|
+
html.to_markdown.should == markdown
|
53
|
+
end
|
54
|
+
|
55
|
+
it "should convert a h1 title" do
|
56
|
+
markdown = "# title #\n"
|
57
|
+
html = markdown.to_html
|
58
|
+
html.to_markdown.should == markdown
|
59
|
+
end
|
60
|
+
|
61
|
+
it "should convert a h3 title" do
|
62
|
+
markdown = "### title ###\n"
|
63
|
+
html = markdown.to_html
|
64
|
+
html.to_markdown.should == markdown
|
65
|
+
end
|
66
|
+
|
67
|
+
# the ultimate test: convert the readme
|
68
|
+
it "should convert a whole text" do
|
69
|
+
markdown = File.open(File.expand_path(File.dirname(__FILE__) + "/../README.md")).read
|
70
|
+
html = markdown.to_html
|
71
|
+
html.to_markdown.should == markdown
|
72
|
+
end
|
73
|
+
|
74
|
+
end
|
75
|
+
|
data/spec/rcov.opts
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
-x gems,spec
|
data/spec/spec.opts
ADDED
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
dir = File.dirname(__FILE__)
|
2
|
+
$LOAD_PATH.unshift "#{dir}/../lib"
|
3
|
+
|
4
|
+
require 'rubygems'
|
5
|
+
require 'spec'
|
6
|
+
require 'nwodkram'
|
7
|
+
require 'bluecloth' # markdown converter to test against
|
8
|
+
require 'rspec/matchers'
|
9
|
+
|
10
|
+
Spec::Runner.configure do |config|
|
11
|
+
end
|
12
|
+
|
13
|
+
class String
|
14
|
+
def to_html
|
15
|
+
bc = BlueCloth.new(self)
|
16
|
+
bc.to_html
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
Rspec::Matchers.define :convert_to_html_and_back do
|
21
|
+
match do |markdown|
|
22
|
+
html = markdown.to_html
|
23
|
+
html.to_markdown == markdown
|
24
|
+
end
|
25
|
+
end
|
metadata
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: nwodkram
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 2
|
8
|
+
version: "0.2"
|
9
|
+
platform: ruby
|
10
|
+
authors:
|
11
|
+
- Elise Huard
|
12
|
+
autorequire:
|
13
|
+
bindir: bin
|
14
|
+
cert_chain: []
|
15
|
+
|
16
|
+
date: 2010-05-01 00:00:00 +02:00
|
17
|
+
default_executable:
|
18
|
+
dependencies:
|
19
|
+
- !ruby/object:Gem::Dependency
|
20
|
+
name: nokogiri
|
21
|
+
prerelease: false
|
22
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
segments:
|
27
|
+
- 1
|
28
|
+
- 4
|
29
|
+
- 1
|
30
|
+
version: 1.4.1
|
31
|
+
type: :runtime
|
32
|
+
version_requirements: *id001
|
33
|
+
description: Convert html into markdown
|
34
|
+
email: nwodkram@elisehuard.be
|
35
|
+
executables: []
|
36
|
+
|
37
|
+
extensions: []
|
38
|
+
|
39
|
+
extra_rdoc_files: []
|
40
|
+
|
41
|
+
files:
|
42
|
+
- README.md
|
43
|
+
- lib/nwodkram_parser.rb
|
44
|
+
- lib/nwodkram.rb
|
45
|
+
- Rakefile
|
46
|
+
- nwodkram.gemspec
|
47
|
+
- spec/spec_helper.rb
|
48
|
+
- spec/spec.opts
|
49
|
+
- spec/rcov.opts
|
50
|
+
- spec/nwodkram_spec.rb
|
51
|
+
has_rdoc: true
|
52
|
+
homepage:
|
53
|
+
licenses: []
|
54
|
+
|
55
|
+
post_install_message:
|
56
|
+
rdoc_options: []
|
57
|
+
|
58
|
+
require_paths:
|
59
|
+
- lib
|
60
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
61
|
+
requirements:
|
62
|
+
- - ">="
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
segments:
|
65
|
+
- 0
|
66
|
+
version: "0"
|
67
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
68
|
+
requirements:
|
69
|
+
- - ">="
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
segments:
|
72
|
+
- 0
|
73
|
+
version: "0"
|
74
|
+
requirements: []
|
75
|
+
|
76
|
+
rubyforge_project:
|
77
|
+
rubygems_version: 1.3.6
|
78
|
+
signing_key:
|
79
|
+
specification_version: 3
|
80
|
+
summary: Convert html into markdown
|
81
|
+
test_files:
|
82
|
+
- spec/nwodkram_spec.rb
|