remark 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +18 -0
- data/README.markdown +29 -0
- data/Rakefile +31 -0
- data/bin/remark +22 -0
- data/lib/remark.rb +61 -0
- data/lib/remark/core_ext.rb +25 -0
- data/lib/remark/hpricot_ext.rb +212 -0
- data/spec/hpricot_ext_spec.rb +138 -0
- data/spec/remark_spec.rb +157 -0
- data/spec/sample.html +53 -0
- metadata +83 -0
data/LICENSE
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
Copyright (c) 2009 Mislav Marohnić
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
4
|
+
this software and associated documentation files (the "Software"), to deal in
|
5
|
+
the Software without restriction, including without limitation the rights to
|
6
|
+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
7
|
+
the Software, and to permit persons to whom the Software is furnished to do so,
|
8
|
+
subject to the following conditions:
|
9
|
+
|
10
|
+
The above copyright notice and this permission notice shall be included in all
|
11
|
+
copies or substantial portions of the Software.
|
12
|
+
|
13
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
15
|
+
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
16
|
+
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
17
|
+
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
18
|
+
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.markdown
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
Remark — HTML→Markdown tool
|
2
|
+
===========================
|
3
|
+
|
4
|
+
<i>Remark</i> parses HTML and delivers proper Markdown.
|
5
|
+
|
6
|
+
Usage
|
7
|
+
-----
|
8
|
+
|
9
|
+
From command-line:
|
10
|
+
|
11
|
+
remark path/to/file.html
|
12
|
+
|
13
|
+
or by STDIN:
|
14
|
+
|
15
|
+
echo "..." | remark
|
16
|
+
|
17
|
+
You can try feeding it a document from the web:
|
18
|
+
|
19
|
+
curl -s daringfireball.net/projects/markdown/basics | remark > result.markdown
|
20
|
+
|
21
|
+
See how it does.
|
22
|
+
|
23
|
+
If you've cloned the repository, invoke the binary like this:
|
24
|
+
|
25
|
+
ruby -Ilib -rubygems bin/remark spec/sample.html
|
26
|
+
|
27
|
+
And this is how you use it from Ruby code:
|
28
|
+
|
29
|
+
Remark.new('<h1>My document</h1><p>Some content</p>').to_markdown
|
data/Rakefile
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
desc "renders the spec/sample.html to Markdown"
|
2
|
+
task :sample do
|
3
|
+
system %(ruby -Ilib -rubygems bin/remark spec/sample.html)
|
4
|
+
end
|
5
|
+
|
6
|
+
desc "generates .gemspec file"
|
7
|
+
task :gemspec do
|
8
|
+
spec = Gem::Specification.new do |gem|
|
9
|
+
gem.name = "remark"
|
10
|
+
gem.version = '0.3.0'
|
11
|
+
|
12
|
+
gem.summary = "HTML to Markdown converter"
|
13
|
+
gem.email = "mislav.marohnic@gmail.com"
|
14
|
+
gem.homepage = "http://github.com/mislav/remark"
|
15
|
+
gem.authors = ["Mislav Marohnić"]
|
16
|
+
gem.has_rdoc = false
|
17
|
+
|
18
|
+
gem.files = FileList['Rakefile', '{bin,lib,rails,spec}/**/*', 'README*', 'LICENSE*'] & `git ls-files`.split("\n")
|
19
|
+
gem.executables = Dir['bin/*'].map { |f| File.basename(f) }
|
20
|
+
end
|
21
|
+
|
22
|
+
spec_string = spec.to_ruby
|
23
|
+
|
24
|
+
begin
|
25
|
+
Thread.new { eval("$SAFE = 3\n#{spec_string}", binding) }.join
|
26
|
+
rescue
|
27
|
+
abort "unsafe gemspec: #{$!}"
|
28
|
+
else
|
29
|
+
File.open("#{spec.name}.gemspec", 'w') { |file| file.write spec_string }
|
30
|
+
end
|
31
|
+
end
|
data/bin/remark
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'optparse'
|
3
|
+
require 'remark'
|
4
|
+
|
5
|
+
options = {}
|
6
|
+
OptionParser.new do |opts|
|
7
|
+
opts.banner = "Usage: remark [options] [FILE]"
|
8
|
+
|
9
|
+
opts.on("-n", "--inline-links", "Render link URLs inline (instead of reference-style)") do |inline|
|
10
|
+
options[:reference_links] = !inline
|
11
|
+
end
|
12
|
+
|
13
|
+
opts.on("-s", "--scope EXPR", "Scope to a spefic CSS/XPath expression in the HTML document") do |scope|
|
14
|
+
options[:scope] = scope
|
15
|
+
end
|
16
|
+
|
17
|
+
opts.on("-i", "--ignore EXPR", "Ignore elements that match CSS/XPath expression") do |expr|
|
18
|
+
(options[:ignores] ||= []) << expr
|
19
|
+
end
|
20
|
+
end.parse!
|
21
|
+
|
22
|
+
puts Remark.new(ARGF.read, options).to_markdown
|
data/lib/remark.rb
ADDED
@@ -0,0 +1,61 @@
|
|
1
|
+
require 'remark/hpricot_ext'
|
2
|
+
|
3
|
+
class Remark
|
4
|
+
DEFAULT_OPTIONS = { :reference_links => true }
|
5
|
+
|
6
|
+
def initialize(source, options = {})
|
7
|
+
@doc = Hpricot(source)
|
8
|
+
@options = DEFAULT_OPTIONS.merge options
|
9
|
+
@links = []
|
10
|
+
@ignored_elements = nil
|
11
|
+
end
|
12
|
+
|
13
|
+
def to_markdown
|
14
|
+
parent = scope
|
15
|
+
collect_ignored_elements(parent)
|
16
|
+
links = @options[:links] = [] unless inline_links?
|
17
|
+
result = parent.to_markdown(@options)
|
18
|
+
result + (inline_links? || links.empty?? '' : "\n\n\n" + output_reference_links(links))
|
19
|
+
end
|
20
|
+
|
21
|
+
def scope
|
22
|
+
if scope = @options[:scope]
|
23
|
+
@doc.at(scope)
|
24
|
+
elsif body = @doc.at('/html/body')
|
25
|
+
candidates = (body / 'p').inject(Hash.new(0)) do |memo, para|
|
26
|
+
memo[para.parent] += 1
|
27
|
+
memo
|
28
|
+
end.invert
|
29
|
+
|
30
|
+
candidates[candidates.keys.max]
|
31
|
+
else
|
32
|
+
@doc
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def inline_links?
|
37
|
+
!@options[:reference_links]
|
38
|
+
end
|
39
|
+
|
40
|
+
def output_reference_links(links)
|
41
|
+
references = []
|
42
|
+
links.each_with_index do |(href, title), i|
|
43
|
+
references << "[#{i + 1}]: #{href}#{title ? ' ' + title.inspect : ''}"
|
44
|
+
end
|
45
|
+
references.join("\n")
|
46
|
+
end
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
def ignore_element?(elem)
|
51
|
+
IGNORE.include?(elem.name) or (@ignored_elements and @ignored_elements.include?(elem))
|
52
|
+
end
|
53
|
+
|
54
|
+
def collect_ignored_elements(scope)
|
55
|
+
if @options[:ignores]
|
56
|
+
@options[:ignored_elements] = @options[:ignores].map do |expr|
|
57
|
+
scope.search(expr).to_a
|
58
|
+
end.flatten.uniq
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
Object.class_eval do
|
2
|
+
def blank?() false end
|
3
|
+
end
|
4
|
+
|
5
|
+
NilClass.class_eval do
|
6
|
+
def blank?() true end
|
7
|
+
end
|
8
|
+
|
9
|
+
String.class_eval do
|
10
|
+
def blank?
|
11
|
+
self.empty? or !!(self =~ /\A\s+\Z/)
|
12
|
+
end
|
13
|
+
|
14
|
+
def squeeze_whitespace
|
15
|
+
self.tr("\n\t", ' ').squeeze(' ')
|
16
|
+
end
|
17
|
+
|
18
|
+
def indent(with = ' ' * 4)
|
19
|
+
self.gsub(/^/, with)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
Hpricot::Text.module_eval do
|
24
|
+
def blank?() to_s.blank? end
|
25
|
+
end
|
@@ -0,0 +1,212 @@
|
|
1
|
+
require 'hpricot'
|
2
|
+
require 'remark/core_ext'
|
3
|
+
|
4
|
+
# this applies the default behavior to virtually all Hpricot classes
|
5
|
+
Hpricot::Node.module_eval do
|
6
|
+
def to_markdown(options = {}) nil end
|
7
|
+
def markdown_block?() false end
|
8
|
+
end
|
9
|
+
|
10
|
+
# nothing special to process on Text or CData
|
11
|
+
Hpricot::Text.module_eval do
|
12
|
+
def to_markdown(options = {}) to_s.squeeze_whitespace end
|
13
|
+
end
|
14
|
+
|
15
|
+
Hpricot::CData.module_eval do
|
16
|
+
def to_markdown(options = {}) to_s.squeeze_whitespace end
|
17
|
+
end
|
18
|
+
|
19
|
+
# elements that have children
|
20
|
+
Hpricot::Container.module_eval do
|
21
|
+
def to_markdown(options = {})
|
22
|
+
return '' unless self.children
|
23
|
+
previous_was_block = false
|
24
|
+
parent_is_block = self.markdown_block?
|
25
|
+
|
26
|
+
# recurse over this element's children
|
27
|
+
content = self.children.inject([]) do |all, child|
|
28
|
+
current_is_block = child.markdown_block?
|
29
|
+
child_content = child.to_markdown(options)
|
30
|
+
|
31
|
+
# skip this node if its markdown is nil, empty or, in case
|
32
|
+
# that the previous element was a block, all-whitespace
|
33
|
+
unless child_content.nil? or child_content.empty? or (previous_was_block and child_content.blank?)
|
34
|
+
# handle separating of adjacent markdown blocks with an empty line
|
35
|
+
if not all.empty? and current_is_block or previous_was_block
|
36
|
+
# strip trailing whitespace if we're opening a new block
|
37
|
+
all.last.blank?? all.pop : all.last.rstrip!
|
38
|
+
# guard against adding a newline at the beginning
|
39
|
+
all << "\n\n" if all.any?
|
40
|
+
end
|
41
|
+
|
42
|
+
unless 'pre' == child.name
|
43
|
+
# strip whitespace from the left if ...
|
44
|
+
child_content.lstrip! if previous_was_block or # we're adjacent to a block
|
45
|
+
(parent_is_block and child == self.children.first) or # this is the first child
|
46
|
+
(not all.empty? and all.last =~ / ( \n)?$/) # we're following a space or a forced line break token
|
47
|
+
|
48
|
+
|
49
|
+
# strip whitespace from the right if this is the last node in a block
|
50
|
+
child_content.rstrip! if parent_is_block and self.children.last == child
|
51
|
+
end
|
52
|
+
|
53
|
+
all << child_content
|
54
|
+
end
|
55
|
+
|
56
|
+
previous_was_block = current_is_block
|
57
|
+
all
|
58
|
+
end
|
59
|
+
|
60
|
+
result = content.join('')
|
61
|
+
return result
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# elements without children
|
66
|
+
Hpricot::Leaf.module_eval do
|
67
|
+
def to_markdown(options = {})
|
68
|
+
inner_text.squeeze_whitespace if elem?
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
Hpricot::Elem.module_eval do
|
73
|
+
IGNORE = %w(script head style)
|
74
|
+
ALLOWED_EMPTY = %w(img br hr )
|
75
|
+
MARKDOWN_BLOCK = %w(p blockquote h1 h2 h3 h4 h5 h6 pre hr)
|
76
|
+
MARKDOWN_INLINE = %w(em strong code a img br)
|
77
|
+
MARKDOWN_RECOGNIZED = MARKDOWN_BLOCK + MARKDOWN_INLINE + %w(div)
|
78
|
+
HTML_BLOCK = MARKDOWN_BLOCK + %w(ul ol dl div noscript form table address fieldset)
|
79
|
+
|
80
|
+
def to_markdown(options = {})
|
81
|
+
return nil if markdown_ignored?(options)
|
82
|
+
return '' if markdown_empty?
|
83
|
+
return to_s unless markdown_supported_attributes?
|
84
|
+
|
85
|
+
case name
|
86
|
+
when 'div', 'noscript'
|
87
|
+
super
|
88
|
+
when 'p'
|
89
|
+
super
|
90
|
+
when /^h([1-6])$/
|
91
|
+
('#' * $1.to_i) + ' ' + super
|
92
|
+
when 'ul', 'ol'
|
93
|
+
remark_list(options)
|
94
|
+
when 'li'
|
95
|
+
content = super
|
96
|
+
content = content.indent if children.any? { |e| e.markdown_block? }
|
97
|
+
content
|
98
|
+
when 'pre'
|
99
|
+
inner_text.rstrip.indent
|
100
|
+
when 'em'
|
101
|
+
"_#{super}_"
|
102
|
+
when 'strong'
|
103
|
+
"**#{super}**"
|
104
|
+
when 'code'
|
105
|
+
code = inner_text
|
106
|
+
code.index('`') ? "`` #{code} ``" : "`#{code}`"
|
107
|
+
when 'a'
|
108
|
+
remark_link(super, self['href'], self['title'], options)
|
109
|
+
when 'img'
|
110
|
+
'!' + remark_link(self['alt'], self['src'], self['title'], :reference_links => false)
|
111
|
+
when 'blockquote'
|
112
|
+
super.indent('> ')
|
113
|
+
when 'br'
|
114
|
+
" \n" + inner_html
|
115
|
+
else
|
116
|
+
to_s
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
def remark_list(options = {})
|
121
|
+
unordered = self.name == 'ul'
|
122
|
+
marker = unordered ? '*' : 0
|
123
|
+
nested = false
|
124
|
+
|
125
|
+
items = self.children_of_type('li').map do |item|
|
126
|
+
item = item.to_markdown(options)
|
127
|
+
current = unordered ? marker : "#{marker += 1}."
|
128
|
+
if item =~ /\A\s/
|
129
|
+
nested = true
|
130
|
+
item[0, current.length] = current
|
131
|
+
item
|
132
|
+
else
|
133
|
+
current + ' ' + item
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
items.join("\n" * (nested ? 2 : 1))
|
138
|
+
end
|
139
|
+
|
140
|
+
def markdown_block?
|
141
|
+
HTML_BLOCK.include?(name)
|
142
|
+
end
|
143
|
+
|
144
|
+
def markdown_recognized?
|
145
|
+
MARKDOWN_RECOGNIZED.include?(name)
|
146
|
+
end
|
147
|
+
|
148
|
+
protected
|
149
|
+
|
150
|
+
def markdown_ignored?(options)
|
151
|
+
IGNORE.include?(name) or
|
152
|
+
(options[:ignored_elements] and options[:ignored_elements].include?(self))
|
153
|
+
end
|
154
|
+
|
155
|
+
def markdown_empty?
|
156
|
+
empty? and markdown_recognized? and not ALLOWED_EMPTY.include?(name)
|
157
|
+
end
|
158
|
+
|
159
|
+
def markdown_supported_attributes?
|
160
|
+
case name
|
161
|
+
when 'div'
|
162
|
+
true
|
163
|
+
when 'a'
|
164
|
+
attribute_names_match?('href', 'title')
|
165
|
+
when 'img'
|
166
|
+
attribute_names_match?(%w(alt src), 'title')
|
167
|
+
when 'ol', 'ul'
|
168
|
+
attributes.empty? and children.all? do |item|
|
169
|
+
not item.elem? or (item.name == 'li' and item.attributes.empty?)
|
170
|
+
end
|
171
|
+
else
|
172
|
+
attributes.empty?
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
def attribute_names_match?(only, optional = nil)
|
177
|
+
names = attributes.keys.sort
|
178
|
+
names -= Array(optional) if optional
|
179
|
+
names == Array(only)
|
180
|
+
end
|
181
|
+
|
182
|
+
def remark_link(text, href, title = nil, options = {})
|
183
|
+
if options[:reference_links]
|
184
|
+
if existing = options[:links].find { |h, t| href == h }
|
185
|
+
num = options[:links].index(existing) + 1
|
186
|
+
else
|
187
|
+
options[:links] << [href, title]
|
188
|
+
num = options[:links].length
|
189
|
+
end
|
190
|
+
"[#{text}][#{num}]"
|
191
|
+
else
|
192
|
+
title_markup = title ? %( "#{title}") : ''
|
193
|
+
"[#{text}](#{href}#{title_markup})"
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
Hpricot::Attributes.class_eval do
|
199
|
+
methods = instance_methods.map { |m| m.to_sym }
|
200
|
+
|
201
|
+
unless methods.include? :empty?
|
202
|
+
def empty?
|
203
|
+
self.to_hash.empty?
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
unless methods.include? :keys
|
208
|
+
def keys
|
209
|
+
self.to_hash.keys
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|
@@ -0,0 +1,138 @@
|
|
1
|
+
require 'remark/hpricot_ext'
|
2
|
+
|
3
|
+
describe Hpricot, "remark extensions" do
|
4
|
+
before(:all) do
|
5
|
+
@doc = Hpricot(<<-HTML.strip)
|
6
|
+
<?xml version="moo" ?>
|
7
|
+
<!DOCTYPE html>
|
8
|
+
<html>
|
9
|
+
<head>
|
10
|
+
<title>Sample document</title>
|
11
|
+
</head>
|
12
|
+
<body>
|
13
|
+
<h1>Sample <strong>Remark</strong> document</h1>
|
14
|
+
<p>
|
15
|
+
A paragraph with <em>nested</em> <strong>content</strong>
|
16
|
+
and <i>Remark</i>-supported elements.
|
17
|
+
</p>
|
18
|
+
|
19
|
+
<a name="content"> </a>
|
20
|
+
<h2>The content</h2>
|
21
|
+
<div id="content">
|
22
|
+
<p>First</p>
|
23
|
+
<p>Second</p>
|
24
|
+
Some content
|
25
|
+
<em>in-between</em>
|
26
|
+
<p>Third</p>
|
27
|
+
</div>
|
28
|
+
<p class="foo">I has classname</p>
|
29
|
+
|
30
|
+
<div id="empty"></div>
|
31
|
+
<blockquote>
|
32
|
+
Some famous quote
|
33
|
+
<blockquote>Nested famous quote</blockquote>
|
34
|
+
</blockquote>
|
35
|
+
<div class="code">
|
36
|
+
<p>Sample code:</p>
|
37
|
+
<pre>def preformatted
|
38
|
+
text
|
39
|
+
end
|
40
|
+
</pre>
|
41
|
+
</div>
|
42
|
+
<img src='moo.jpg' alt='cow'>
|
43
|
+
<img src='moo.jpg' alt='cow' width='16'>
|
44
|
+
|
45
|
+
<code>simple</code> <code>comp ` lex</code> <code><tag></code>
|
46
|
+
|
47
|
+
<div id="br">
|
48
|
+
<p>Foo<br>bar</p>
|
49
|
+
<p>Foo<br>
|
50
|
+
bar <code>baz</code></p>
|
51
|
+
<p>Foo</p><br><br><p>Bar</p><br>
|
52
|
+
</div>
|
53
|
+
|
54
|
+
<ul>
|
55
|
+
<li>First</li>
|
56
|
+
<li>Second</li>
|
57
|
+
</ul>
|
58
|
+
<ol>
|
59
|
+
<li>First</li>
|
60
|
+
<li>Second</li>
|
61
|
+
</ol>
|
62
|
+
</body>
|
63
|
+
</html>
|
64
|
+
HTML
|
65
|
+
end
|
66
|
+
|
67
|
+
def remark(elem, options = {})
|
68
|
+
(String === elem ? @doc.at(elem) : elem).to_markdown(options)
|
69
|
+
end
|
70
|
+
|
71
|
+
it "should return empty string for empty document" do
|
72
|
+
remark(Hpricot('')).should == ''
|
73
|
+
end
|
74
|
+
|
75
|
+
it "should ignore DOCTYPE, HEAD and XML processing instructions" do
|
76
|
+
remark('head').should be_nil
|
77
|
+
remark(@doc.children[0]).should be_nil # doctype
|
78
|
+
remark(@doc.children[2]).should be_nil # xmldecl
|
79
|
+
end
|
80
|
+
|
81
|
+
it "should have whitespace nodes respond to blank" do
|
82
|
+
@doc.at('a[@name]').children.first.blank?
|
83
|
+
end
|
84
|
+
|
85
|
+
it "should support headings" do
|
86
|
+
remark('h1').should == "# Sample **Remark** document"
|
87
|
+
remark('h2').should == "## The content"
|
88
|
+
end
|
89
|
+
|
90
|
+
it "should support paragraphs" do
|
91
|
+
remark('p').should == "A paragraph with _nested_ **content** and <i>Remark</i>-supported elements."
|
92
|
+
end
|
93
|
+
|
94
|
+
it "should split paragraphs with an empty line" do
|
95
|
+
remark('#content').should == "First\n\nSecond\n\nSome content _in-between_\n\nThird"
|
96
|
+
end
|
97
|
+
|
98
|
+
it "should keep full HTML for paragraphs if they have attributes" do
|
99
|
+
remark('p.foo').should == '<p class="foo">I has classname</p>'
|
100
|
+
end
|
101
|
+
|
102
|
+
it "should not break on empty DIV" do
|
103
|
+
remark('#empty').should == ""
|
104
|
+
end
|
105
|
+
|
106
|
+
it "should support blockquotes" do
|
107
|
+
remark('blockquote > blockquote').should == "> Nested famous quote"
|
108
|
+
remark('blockquote').should == "> Some famous quote\n> \n> > Nested famous quote"
|
109
|
+
end
|
110
|
+
|
111
|
+
it "should support preformatted text" do
|
112
|
+
remark('div.code').should == "Sample code:\n\n def preformatted\n text\n end"
|
113
|
+
end
|
114
|
+
|
115
|
+
it "should support image tags" do
|
116
|
+
remark('img[@alt]').should == '![cow](moo.jpg)'
|
117
|
+
remark('img[@width]').should == '<img src="moo.jpg" alt="cow" width="16" />'
|
118
|
+
end
|
119
|
+
|
120
|
+
it "should support code spans" do
|
121
|
+
remark('code').should == "`simple`"
|
122
|
+
remark('code ~ code').should == "`` comp ` lex ``"
|
123
|
+
remark('code ~ code ~ code').should == "`<tag>`"
|
124
|
+
end
|
125
|
+
|
126
|
+
it "should support BR" do
|
127
|
+
remark('#br').should == "Foo \nbar\n\nFoo \nbar `baz`\n\nFoo\n\nBar"
|
128
|
+
end
|
129
|
+
|
130
|
+
it "should support unordered list" do
|
131
|
+
remark('ul').should == "* First\n* Second"
|
132
|
+
end
|
133
|
+
|
134
|
+
it "should support ordered list" do
|
135
|
+
remark('ol').should == "1. First\n2. Second"
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
data/spec/remark_spec.rb
ADDED
@@ -0,0 +1,157 @@
|
|
1
|
+
require 'remark'
|
2
|
+
|
3
|
+
describe Remark do
|
4
|
+
def remark(source, options = {})
|
5
|
+
options = {:reference_links => false}.merge(options)
|
6
|
+
described_class.new(source, options).to_markdown
|
7
|
+
end
|
8
|
+
|
9
|
+
it "should let through text content" do
|
10
|
+
remark("Foo bar").should == 'Foo bar'
|
11
|
+
remark("Foo bar\nbaz").should == 'Foo bar baz'
|
12
|
+
end
|
13
|
+
|
14
|
+
it "should preserve elements in remarked blocks" do
|
15
|
+
remark("<p>Foo <ins>bar</ins></p>").should == 'Foo <ins>bar</ins>'
|
16
|
+
remark("<h2>Foo <ins>bar</ins></h2>").should == '## Foo <ins>bar</ins>'
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should unescape HTML entities" do
|
20
|
+
remark("Foo&bar").should == 'Foo&bar'
|
21
|
+
remark("<p>If you’re doing all your development on the “master” branch, you’re not using git").should == "If you’re doing all your development on the “master” branch, you’re not using git"
|
22
|
+
end
|
23
|
+
|
24
|
+
it "should leave unknown elements intact" do
|
25
|
+
remark(<<-HTML).should == "Foo\n\n<table>data</table>\n\nBar"
|
26
|
+
<p>Foo</p>
|
27
|
+
<table>data</table>
|
28
|
+
<p>Bar</p>
|
29
|
+
HTML
|
30
|
+
end
|
31
|
+
|
32
|
+
describe "whitespace" do
|
33
|
+
it "should strip excess whitespace" do
|
34
|
+
remark(<<-HTML).should == "Foo bar"
|
35
|
+
<p>
|
36
|
+
Foo
|
37
|
+
bar
|
38
|
+
</p>
|
39
|
+
HTML
|
40
|
+
end
|
41
|
+
|
42
|
+
it "should strip whitespace in text nodes between processed nodes" do
|
43
|
+
remark(<<-HTML).should == "Foo\n\nbar\n\nBaz"
|
44
|
+
<p>Foo</p>
|
45
|
+
|
46
|
+
bar
|
47
|
+
<p>Baz</p>
|
48
|
+
HTML
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
describe "lists" do
|
53
|
+
it "should support lists" do
|
54
|
+
remark(<<-HTML).should == "* foo\n* bar"
|
55
|
+
<ul>
|
56
|
+
<li>foo</li>
|
57
|
+
<li>bar</li>
|
58
|
+
</ul>
|
59
|
+
HTML
|
60
|
+
|
61
|
+
remark(<<-HTML).should == "1. foo\n2. bar"
|
62
|
+
<ol>
|
63
|
+
<li>foo</li>
|
64
|
+
<li>bar</li>
|
65
|
+
</ol>
|
66
|
+
HTML
|
67
|
+
end
|
68
|
+
|
69
|
+
it "should support lists with nested content" do
|
70
|
+
remark(<<-HTML).should == "* foo\n \n bar\n\n* baz"
|
71
|
+
<ul>
|
72
|
+
<li><p>foo</p><p>bar</p></li>
|
73
|
+
<li><p>baz</p></li>
|
74
|
+
</ul>
|
75
|
+
HTML
|
76
|
+
end
|
77
|
+
|
78
|
+
it "should output malformed lists as HTML" do
|
79
|
+
remark(<<-HTML).should == "<ul>\n <span>bar</span>\n </ul>"
|
80
|
+
<ul>
|
81
|
+
<span>bar</span>
|
82
|
+
</ul>
|
83
|
+
HTML
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
it "should support preformatted blocks" do
|
88
|
+
remark("<pre>def foo\n bar\nend</pre>").should == " def foo\n bar\n end"
|
89
|
+
remark("<pre><code>def foo\n <bar>\nend</code></pre>").should == " def foo\n <bar>\n end"
|
90
|
+
remark("<pre>def foo\n</pre>").should == " def foo"
|
91
|
+
end
|
92
|
+
|
93
|
+
describe "inline" do
|
94
|
+
it "should remark inline elements" do
|
95
|
+
remark("<p>I'm so <strong>strong</strong></p>").should == "I'm so **strong**"
|
96
|
+
remark("<p>I'm so <em>emo</em></p>").should == "I'm so _emo_"
|
97
|
+
remark("<ul><li><em>Inline</em> stuff in <strong>lists</strong></li></ul>").should == "* _Inline_ stuff in **lists**"
|
98
|
+
remark("<h1>Headings <em>too</em></h1>").should == '# Headings _too_'
|
99
|
+
end
|
100
|
+
|
101
|
+
it "should handle nested inline elements" do
|
102
|
+
remark("<p>I <strong>love <code>code</code></strong></p>").should == "I **love `code`**"
|
103
|
+
remark("<p>I <a href='#'>am <em>fine</em></a></p>").should == "I [am _fine_](#)"
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
describe "hyperlinks" do
|
108
|
+
it "should support hyperlinks" do
|
109
|
+
remark("<p>Click <a href='http://mislav.uniqpath.com'>here</a></p>").should ==
|
110
|
+
"Click [here](http://mislav.uniqpath.com)"
|
111
|
+
remark("<a href='/foo' title='bar'>baz</a>").should == '[baz](/foo "bar")'
|
112
|
+
end
|
113
|
+
|
114
|
+
it "should have reference-style hyperlinks" do
|
115
|
+
remark("<p>Click <a href='foo' title='mooslav'>here</a> and <a href='bar'>there</a></p>", :reference_links => true).should ==
|
116
|
+
"Click [here][1] and [there][2]\n\n\n[1]: foo \"mooslav\"\n[2]: bar"
|
117
|
+
remark("<p>Click <a href='foo'>here</a> and <a href='foo'>there</a></p>", :reference_links => true).should ==
|
118
|
+
"Click [here][1] and [there][1]\n\n\n[1]: foo"
|
119
|
+
remark("", :reference_links => true).should == ""
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
it "should support ignores" do
|
124
|
+
remark("<p>Foo <span>bar</span> baz</p>", :ignores => ['span']).should == "Foo baz"
|
125
|
+
end
|
126
|
+
|
127
|
+
describe "scoping" do
|
128
|
+
before do
|
129
|
+
@html = <<-HTML
|
130
|
+
<html>
|
131
|
+
<body>
|
132
|
+
<div id="div1">
|
133
|
+
<p>Only 1 paragraph</p>
|
134
|
+
</div>
|
135
|
+
<div id="div3">
|
136
|
+
<p>Wow, 3 paragraphs</p>
|
137
|
+
<p>This must be where the content is</p>
|
138
|
+
<p>I'm sure</p>
|
139
|
+
</div>
|
140
|
+
<div id="div2">
|
141
|
+
<p>Only 2 paragraphs</p>
|
142
|
+
<p>How disappointing</p>
|
143
|
+
</div>
|
144
|
+
</body>
|
145
|
+
</html>
|
146
|
+
HTML
|
147
|
+
end
|
148
|
+
|
149
|
+
it "should scope to the most likely element that holds content" do
|
150
|
+
remark(@html).should == "Wow, 3 paragraphs\n\nThis must be where the content is\n\nI'm sure"
|
151
|
+
end
|
152
|
+
|
153
|
+
it "should scope to the explicit scope" do
|
154
|
+
remark(@html, :scope => '#div2').should == "Only 2 paragraphs\n\nHow disappointing"
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
data/spec/sample.html
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
<h1>Remark — HTML to Markdown converter</h1>
|
2
|
+
|
3
|
+
<p>This is a sample document which will get updated as Remark understands more HTML.
|
4
|
+
It reflects what's currently supported.</p>
|
5
|
+
|
6
|
+
<p class="nice">Known block elements are left intact if they have attributes.
|
7
|
+
Markdown doesn't have a syntax for them.</p>
|
8
|
+
|
9
|
+
<table>
|
10
|
+
<tr>
|
11
|
+
<td>Elements that can't be represented in Markdown are left intact.</td>
|
12
|
+
</tr>
|
13
|
+
</table>
|
14
|
+
|
15
|
+
<p>SCRIPT and HEAD tags are swallowed, as browsers don't render them as content.</p>
|
16
|
+
|
17
|
+
<script type="text/javascript">
|
18
|
+
alert("I will not survive")
|
19
|
+
</script>
|
20
|
+
|
21
|
+
<p>Remark supports Markdown syntax for <em>inline</em> markup.
|
22
|
+
<a href="http://github.com/mislav">Hyperlinks</a> and <code>code spans</code> are a must.</p>
|
23
|
+
|
24
|
+
<ul>
|
25
|
+
<li>List items too;</li>
|
26
|
+
<li>ordered or unordered.</li>
|
27
|
+
</ul>
|
28
|
+
|
29
|
+
<ol>
|
30
|
+
<li><p>Paragraphs in list items</p></li>
|
31
|
+
<li><p>Make them have one blank line between them in Markdown</p></li>
|
32
|
+
<li>
|
33
|
+
<p>Some list items even have multiple paragraphs</p>
|
34
|
+
<p>That shouldn't be too hard to do … right?</p>
|
35
|
+
<pre>code blocks too</pre>
|
36
|
+
</li>
|
37
|
+
</ol>
|
38
|
+
|
39
|
+
<p>Remark supports BR elements in paragraphs,<br>
|
40
|
+
although people tend to abuse them.</p>
|
41
|
+
|
42
|
+
<pre><code>And who would forget
|
43
|
+
Preformatted code blocks :)</code></pre>
|
44
|
+
|
45
|
+
<p>Notice how it handles <img src="moo.jpg" alt="images" title="Awesum img"> in a nice way.</p>
|
46
|
+
|
47
|
+
<blockquote>
|
48
|
+
<p>I think</p>
|
49
|
+
<p>therefore I am</p>
|
50
|
+
<blockquote>
|
51
|
+
<p>Nested blockquotes</p>
|
52
|
+
</blockquote>
|
53
|
+
</blockquote>
|
metadata
ADDED
@@ -0,0 +1,83 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: remark
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.3.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- "Mislav Marohni\xC4\x87"
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-11-25 00:00:00 +01:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: hpricot
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ~>
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.8.2
|
24
|
+
version:
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: rspec
|
27
|
+
type: :development
|
28
|
+
version_requirement:
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 1.2.9
|
34
|
+
version:
|
35
|
+
description: Remark turns simple HTML documents or content in web pages to Markdown source.
|
36
|
+
email: mislav.marohnic@gmail.com
|
37
|
+
executables: []
|
38
|
+
|
39
|
+
extensions: []
|
40
|
+
|
41
|
+
extra_rdoc_files: []
|
42
|
+
|
43
|
+
files:
|
44
|
+
- Rakefile
|
45
|
+
- bin/remark
|
46
|
+
- lib/remark/core_ext.rb
|
47
|
+
- lib/remark/hpricot_ext.rb
|
48
|
+
- lib/remark.rb
|
49
|
+
- spec/hpricot_ext_spec.rb
|
50
|
+
- spec/remark_spec.rb
|
51
|
+
- spec/sample.html
|
52
|
+
- README.markdown
|
53
|
+
- LICENSE
|
54
|
+
has_rdoc: false
|
55
|
+
homepage: http://github.com/mislav/remark
|
56
|
+
licenses: []
|
57
|
+
|
58
|
+
post_install_message:
|
59
|
+
rdoc_options: []
|
60
|
+
|
61
|
+
require_paths:
|
62
|
+
- lib
|
63
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
64
|
+
requirements:
|
65
|
+
- - ">="
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
version: "0"
|
68
|
+
version:
|
69
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
70
|
+
requirements:
|
71
|
+
- - ">="
|
72
|
+
- !ruby/object:Gem::Version
|
73
|
+
version: "0"
|
74
|
+
version:
|
75
|
+
requirements: []
|
76
|
+
|
77
|
+
rubyforge_project:
|
78
|
+
rubygems_version: 1.3.5
|
79
|
+
signing_key:
|
80
|
+
specification_version: 3
|
81
|
+
summary: HTML to Markdown converter
|
82
|
+
test_files: []
|
83
|
+
|