nwodkram 0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md ADDED
@@ -0,0 +1,28 @@
1
+ # Nwodkram #
2
+
3
+ nwodkram (markdown in reverse) is a tool to reverse html into markdown.
4
+
5
+ ## Usage ##
6
+
7
+ The gem reopens class String and adds a to_markdown method.
8
+ When valid html is given, it returns markdown.
9
+ html = File.open('page.html', 'r') {|f| f.read }
10
+ markdown = html.to_markdown
11
+
12
+ ## Use case ##
13
+
14
+ My use case was to migrate a blog from wordpress.com to something more civilised (and self-hosted).
15
+
16
+ ## Supports ##
17
+
18
+ *tags*:
19
+ p, a, img, ul, ol, li, h1, h2, h3, em, strong
20
+
21
+ *rubies*:
22
+ tested with MRI ruby 1.8.7 and 1.9.2 (preview1)
23
+
24
+ ## TODO ##
25
+
26
+ The tool doesn't support the [full markdown syntax](http://daringfireball.net/projects/markdown/syntax).
27
+
28
+ If this tool gets used, I could extend it. For now, it's suffient for my limited purposes.
data/Rakefile ADDED
@@ -0,0 +1,28 @@
1
+ require "rubygems"
2
+
3
+
4
+ task :default => :spec
5
+
6
+ begin
7
+ require 'spec/rake/spectask'
8
+ rescue LoadError
9
+ desc "Run specs"
10
+ task(:spec) { $stderr.puts '`gem install rspec` to run specs' }
11
+ else
12
+ desc "Run specs"
13
+ Spec::Rake::SpecTask.new do |t|
14
+ t.spec_opts = ['--options', "\"#{File.dirname(__FILE__)}/spec/spec.opts\""]
15
+ t.spec_files = FileList['spec/*_spec.rb']
16
+ end
17
+
18
+ desc "Run all specs in spec directory with RCov"
19
+ Spec::Rake::SpecTask.new(:rcov) do |t|
20
+ t.spec_opts = ['--options', "\"#{File.dirname(__FILE__)}/spec/spec.opts\""]
21
+ t.spec_files = FileList['spec/*_spec.rb']
22
+ t.rcov = true
23
+ t.rcov_opts = lambda do
24
+ IO.readlines(File.dirname(__FILE__) + "/spec/rcov.opts").map {|l| l.chomp.split " "}.flatten
25
+ end
26
+ end
27
+
28
+ end
data/lib/nwodkram.rb ADDED
@@ -0,0 +1,27 @@
1
+ require 'nokogiri'
2
+ require 'nwodkram_parser'
3
+ class Nwodkram
4
+
5
+ attr_accessor :html
6
+
7
+ def initialize(html)
8
+ @html = html
9
+ end
10
+
11
+ def to_markdown
12
+ parser = Nokogiri::HTML::SAX::Parser.new(NwodkramParser.new)
13
+ out = StringIO.new
14
+ $stdout = out
15
+ parser.parse(@html)
16
+ out.string
17
+ ensure
18
+ $stdout = STDOUT
19
+ end
20
+
21
+ end
22
+
23
+ class String
24
+ def to_markdown
25
+ Nwodkram.new(self).to_markdown
26
+ end
27
+ end
@@ -0,0 +1,69 @@
1
+ class NwodkramParser < Nokogiri::XML::SAX::Document
2
+
3
+ attr_accessor :attr, :name
4
+
5
+ MARKDOWN = { 'p' => ["" , "\n"],
6
+ 'a' => ["[" , lambda { "](#{@attr['href']})" }],
7
+ 'img' => [ lambda {"![#{@attr['title'] || @attr['alt']}](#{@attr['src']})"}, ""],
8
+ 'li' => [ lambda { @parent == "ul" ? "* " : "1. " } , "\n"],
9
+ 'code' => ["" , ""],
10
+ 'h1' => ["# " , " #\n" ],
11
+ 'h2' => ["## " , " ##\n" ],
12
+ 'h3' => ["### " , " ###\n"],
13
+ 'em' => ["*" , "*"],
14
+ 'strong' => ["**" , "**"] }
15
+
16
+ def start_element(name,attributes = [])
17
+ @name, @attr = name, attr_hash(attributes)
18
+ MARKDOWN[name] ? print(local_value(MARKDOWN[name][0])) : print("")
19
+ track_parent
20
+ end
21
+
22
+ def end_element(name)
23
+ MARKDOWN[name] ? print(local_value(MARKDOWN[name][1])) : print("")
24
+ end
25
+
26
+ # content of the markup
27
+ def characters(string)
28
+ case @name
29
+ when "code"
30
+ string.split("\n").each {|line|
31
+ print " #{line}\n" # 4 spaces to indicate code
32
+ }
33
+ when "img"
34
+ print "" # image doesn't contain any children, normally
35
+ when 'p','h1','h2','h3', 'li', 'ul','ol'
36
+ print string.chomp
37
+ else
38
+ # getting rid of annoying newlines
39
+ string[0] == 10 ? print("\n") : print(string)
40
+ end
41
+ end
42
+
43
+ def cdata_block(string)
44
+ print string
45
+ end
46
+
47
+ private
48
+ # way to handle the start and end of elements - local evaluation if the value is a proc
49
+ def local_value(value)
50
+ value.is_a?(Proc) ? instance_eval(&value) : value
51
+ end
52
+
53
+
54
+ # take attribute array and make it a hash
55
+ # structure of attributes [key1, value2, key2, value2,...]
56
+ def attr_hash(attributes)
57
+ output = {}
58
+ attributes.each_with_index do |attr,i|
59
+ output[attr] = attributes[i+1] if !i.odd? and i << (attributes.size-2)
60
+ end
61
+ output
62
+ end
63
+
64
+ # for ul and ol, it's necessary to keep track of which is which
65
+ def track_parent
66
+ @parent = @name unless @name == 'li'
67
+ end
68
+
69
+ end
data/nwodkram.gemspec ADDED
@@ -0,0 +1,26 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = 'nwodkram'
3
+ s.version = '0.2'
4
+ s.date = '2010-05-01'
5
+
6
+ s.description = 'Convert html into markdown'
7
+ s.summary = 'Convert html into markdown'
8
+
9
+ s.authors = ['Elise Huard']
10
+ s.email = 'nwodkram@elisehuard.be'
11
+
12
+ s.files = %w[
13
+ README.md
14
+ lib/nwodkram_parser.rb
15
+ lib/nwodkram.rb
16
+ Rakefile
17
+ nwodkram.gemspec
18
+ spec/spec_helper.rb
19
+ spec/spec.opts
20
+ spec/rcov.opts
21
+ spec/nwodkram_spec.rb
22
+ ]
23
+ s.test_files = s.files.select {|path| path =~ /^spec\/.*_spec.rb/ }
24
+ s.add_dependency 'nokogiri', '>= 1.4.1'
25
+ s.require_paths = %w[lib]
26
+ end
@@ -0,0 +1,75 @@
1
+ require File.expand_path(File.dirname(__FILE__) + "/spec_helper")
2
+ require 'stringio'
3
+
4
+ # Issue: BlueCloth surrounds every bit of html with a
5
+ describe "converting html to markdown" do
6
+
7
+ it "should not convert simple text" do
8
+ markdown = "hello hello\n"
9
+ html = markdown.to_html
10
+ html.to_markdown.should == markdown
11
+ end
12
+
13
+ it "should convert emphasis" do
14
+ markdown = "*NO* smoking\n"
15
+ html = markdown.to_html
16
+ html.to_markdown.should == markdown
17
+ end
18
+
19
+ it "should convert strong" do
20
+ markdown = "I was **born** under a wandering star\n"
21
+ html = markdown.to_html
22
+ html.to_markdown.should == markdown
23
+ end
24
+
25
+ it "should convert a link" do
26
+ markdown = "my favourite site [test](http://test.com) because it rules\n"
27
+ html = markdown.to_html
28
+ html.to_markdown.should == markdown
29
+ end
30
+
31
+ it "should convert an image" do
32
+ markdown = "![title](http://test.com/lalala.jpg)\n"
33
+ html = markdown.to_html
34
+ html.to_markdown.should == markdown
35
+ end
36
+
37
+ it "should convert an unordered list" do
38
+ markdown = "* test\n* test2\n"
39
+ html = markdown.to_html
40
+ html.to_markdown.should == markdown
41
+ end
42
+
43
+ it "should convert an ordered list" do
44
+ markdown = "1. test\n1. test2\n"
45
+ html = markdown.to_html
46
+ html.to_markdown.should == markdown
47
+ end
48
+
49
+ it "should convert a pre" do
50
+ markdown = " def test\n p today\n end\n"
51
+ html = markdown.to_html
52
+ html.to_markdown.should == markdown
53
+ end
54
+
55
+ it "should convert a h1 title" do
56
+ markdown = "# title #\n"
57
+ html = markdown.to_html
58
+ html.to_markdown.should == markdown
59
+ end
60
+
61
+ it "should convert a h3 title" do
62
+ markdown = "### title ###\n"
63
+ html = markdown.to_html
64
+ html.to_markdown.should == markdown
65
+ end
66
+
67
+ # the ultimate test: convert the readme
68
+ it "should convert a whole text" do
69
+ markdown = File.open(File.expand_path(File.dirname(__FILE__) + "/../README.md")).read
70
+ html = markdown.to_html
71
+ html.to_markdown.should == markdown
72
+ end
73
+
74
+ end
75
+
data/spec/rcov.opts ADDED
@@ -0,0 +1 @@
1
+ -x gems,spec
data/spec/spec.opts ADDED
@@ -0,0 +1,2 @@
1
+ --diff
2
+ --color
@@ -0,0 +1,25 @@
1
+ dir = File.dirname(__FILE__)
2
+ $LOAD_PATH.unshift "#{dir}/../lib"
3
+
4
+ require 'rubygems'
5
+ require 'spec'
6
+ require 'nwodkram'
7
+ require 'bluecloth' # markdown converter to test against
8
+ require 'rspec/matchers'
9
+
10
+ Spec::Runner.configure do |config|
11
+ end
12
+
13
+ class String
14
+ def to_html
15
+ bc = BlueCloth.new(self)
16
+ bc.to_html
17
+ end
18
+ end
19
+
20
+ Rspec::Matchers.define :convert_to_html_and_back do
21
+ match do |markdown|
22
+ html = markdown.to_html
23
+ html.to_markdown == markdown
24
+ end
25
+ end
metadata ADDED
@@ -0,0 +1,82 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: nwodkram
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 2
8
+ version: "0.2"
9
+ platform: ruby
10
+ authors:
11
+ - Elise Huard
12
+ autorequire:
13
+ bindir: bin
14
+ cert_chain: []
15
+
16
+ date: 2010-05-01 00:00:00 +02:00
17
+ default_executable:
18
+ dependencies:
19
+ - !ruby/object:Gem::Dependency
20
+ name: nokogiri
21
+ prerelease: false
22
+ requirement: &id001 !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ segments:
27
+ - 1
28
+ - 4
29
+ - 1
30
+ version: 1.4.1
31
+ type: :runtime
32
+ version_requirements: *id001
33
+ description: Convert html into markdown
34
+ email: nwodkram@elisehuard.be
35
+ executables: []
36
+
37
+ extensions: []
38
+
39
+ extra_rdoc_files: []
40
+
41
+ files:
42
+ - README.md
43
+ - lib/nwodkram_parser.rb
44
+ - lib/nwodkram.rb
45
+ - Rakefile
46
+ - nwodkram.gemspec
47
+ - spec/spec_helper.rb
48
+ - spec/spec.opts
49
+ - spec/rcov.opts
50
+ - spec/nwodkram_spec.rb
51
+ has_rdoc: true
52
+ homepage:
53
+ licenses: []
54
+
55
+ post_install_message:
56
+ rdoc_options: []
57
+
58
+ require_paths:
59
+ - lib
60
+ required_ruby_version: !ruby/object:Gem::Requirement
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ segments:
65
+ - 0
66
+ version: "0"
67
+ required_rubygems_version: !ruby/object:Gem::Requirement
68
+ requirements:
69
+ - - ">="
70
+ - !ruby/object:Gem::Version
71
+ segments:
72
+ - 0
73
+ version: "0"
74
+ requirements: []
75
+
76
+ rubyforge_project:
77
+ rubygems_version: 1.3.6
78
+ signing_key:
79
+ specification_version: 3
80
+ summary: Convert html into markdown
81
+ test_files:
82
+ - spec/nwodkram_spec.rb