mislav-remark 0.2.1 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +7 -1
- data/bin/remark +19 -1
- data/lib/remark.rb +30 -125
- data/lib/remark/core_ext.rb +25 -0
- data/lib/remark/hpricot_ext.rb +196 -0
- data/spec/hpricot_ext_spec.rb +138 -0
- data/spec/remark_spec.rb +119 -118
- metadata +7 -3
data/Rakefile
CHANGED
@@ -1,14 +1,20 @@
|
|
1
|
+
desc "renders the spec/sample.html to Markdown"
|
2
|
+
task :sample do
|
3
|
+
system %(ruby -Ilib -rubygems bin/remark spec/sample.html)
|
4
|
+
end
|
5
|
+
|
1
6
|
desc "generates .gemspec file"
|
2
7
|
task :gemspec do
|
3
8
|
spec = Gem::Specification.new do |gem|
|
4
9
|
gem.name = "remark"
|
10
|
+
gem.version = '0.3.0'
|
11
|
+
|
5
12
|
gem.summary = "HTML to Markdown converter"
|
6
13
|
gem.email = "mislav.marohnic@gmail.com"
|
7
14
|
gem.homepage = "http://github.com/mislav/remark"
|
8
15
|
gem.authors = ["Mislav Marohnić"]
|
9
16
|
gem.has_rdoc = false
|
10
17
|
|
11
|
-
gem.version = '0.2.1'
|
12
18
|
gem.files = FileList['Rakefile', '{bin,lib,rails,spec}/**/*', 'README*', 'LICENSE*'] & `git ls-files`.split("\n")
|
13
19
|
gem.executables = Dir['bin/*'].map { |f| File.basename(f) }
|
14
20
|
end
|
data/bin/remark
CHANGED
@@ -1,4 +1,22 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
require 'optparse'
|
2
3
|
require 'remark'
|
3
4
|
|
4
|
-
|
5
|
+
options = {}
|
6
|
+
OptionParser.new do |opts|
|
7
|
+
opts.banner = "Usage: remark [options] [FILE]"
|
8
|
+
|
9
|
+
opts.on("-n", "--inline-links", "Render link URLs inline (instead of reference-style)") do |inline|
|
10
|
+
options[:reference_links] = !inline
|
11
|
+
end
|
12
|
+
|
13
|
+
opts.on("-s", "--scope EXPR", "Scope to a spefic CSS/XPath expression in the HTML document") do |scope|
|
14
|
+
options[:scope] = scope
|
15
|
+
end
|
16
|
+
|
17
|
+
opts.on("-i", "--ignore EXPR", "Ignore elements that match CSS/XPath expression") do |expr|
|
18
|
+
(options[:ignores] ||= []) << expr
|
19
|
+
end
|
20
|
+
end.parse!
|
21
|
+
|
22
|
+
puts Remark.new(ARGF.read, options).to_markdown
|
data/lib/remark.rb
CHANGED
@@ -1,16 +1,27 @@
|
|
1
|
-
require '
|
1
|
+
require 'remark/hpricot_ext'
|
2
2
|
|
3
3
|
class Remark
|
4
|
-
|
4
|
+
DEFAULT_OPTIONS = { :reference_links => true }
|
5
|
+
|
6
|
+
def initialize(source, options = {})
|
5
7
|
@doc = Hpricot(source)
|
8
|
+
@options = DEFAULT_OPTIONS.merge options
|
9
|
+
@links = []
|
10
|
+
@ignored_elements = nil
|
6
11
|
end
|
7
12
|
|
8
13
|
def to_markdown
|
9
|
-
|
14
|
+
parent = scope
|
15
|
+
collect_ignored_elements(parent)
|
16
|
+
links = @options[:links] = [] unless inline_links?
|
17
|
+
result = parent.to_markdown(@options)
|
18
|
+
result + (inline_links? || links.empty?? '' : "\n\n\n" + output_reference_links(links))
|
10
19
|
end
|
11
20
|
|
12
21
|
def scope
|
13
|
-
if
|
22
|
+
if scope = @options[:scope]
|
23
|
+
@doc.at(scope)
|
24
|
+
elsif body = @doc.at('/html/body')
|
14
25
|
candidates = (body / 'p').inject(Hash.new(0)) do |memo, para|
|
15
26
|
memo[para.parent] += 1
|
16
27
|
memo
|
@@ -22,135 +33,29 @@ class Remark
|
|
22
33
|
end
|
23
34
|
end
|
24
35
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
private
|
29
|
-
|
30
|
-
def valid_attributes?(elem)
|
31
|
-
case elem.name
|
32
|
-
when 'a'
|
33
|
-
(elem.attributes.keys - %w(title)) == %w(href)
|
34
|
-
when 'img'
|
35
|
-
(elem.attributes.keys - %w(title)).sort == %w(alt src)
|
36
|
-
else
|
37
|
-
elem.attributes.empty?
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
def remark_block(elem)
|
42
|
-
remark_children(elem).
|
43
|
-
reject { |item| item.blank? }.
|
44
|
-
join("\n\n")
|
36
|
+
def inline_links?
|
37
|
+
!@options[:reference_links]
|
45
38
|
end
|
46
39
|
|
47
|
-
def
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
remarked << result if result
|
52
|
-
end
|
53
|
-
remarked
|
54
|
-
end
|
55
|
-
|
56
|
-
def remark_item(item)
|
57
|
-
if item.text?
|
58
|
-
item.to_s.gsub(/\n+/, ' ') unless item.blank?
|
59
|
-
elsif item.elem?
|
60
|
-
if IGNORE.include?(item.name)
|
61
|
-
nil
|
62
|
-
elsif valid_attributes?(item)
|
63
|
-
remark_element(item)
|
64
|
-
else
|
65
|
-
item
|
66
|
-
end
|
67
|
-
end
|
68
|
-
end
|
69
|
-
|
70
|
-
def remark_element(elem)
|
71
|
-
case elem.name
|
72
|
-
when 'p'
|
73
|
-
remark_inline(elem)
|
74
|
-
when /^h([1-6])$/
|
75
|
-
('#' * $1.to_i) + ' ' + remark_inline(elem)
|
76
|
-
when 'ul', 'ol'
|
77
|
-
remark_list(elem)
|
78
|
-
when 'li'
|
79
|
-
elem.children.any? { |e| e.elem? and BLOCK.include?(e.name) } ?
|
80
|
-
remark_block(elem).indent : remark_inline(elem)
|
81
|
-
when 'pre'
|
82
|
-
elem.inner_text.indent
|
83
|
-
when 'em'
|
84
|
-
"_#{elem.inner_text}_"
|
85
|
-
when 'strong'
|
86
|
-
"**#{elem.inner_text}**"
|
87
|
-
when 'code'
|
88
|
-
code = elem.inner_text
|
89
|
-
code.index('`') ? "`` #{code} ``" : "`#{code}`"
|
90
|
-
when 'a'
|
91
|
-
remark_link(elem.inner_html, elem.attributes['href'], elem.attributes['title'])
|
92
|
-
when 'img'
|
93
|
-
'!' + remark_link(elem.attributes['alt'], elem.attributes['src'], elem.attributes['title'])
|
94
|
-
when 'blockquote'
|
95
|
-
remark_children(elem).join("\n\n").indent('> ')
|
96
|
-
when 'br'
|
97
|
-
" \n" + elem.inner_html
|
98
|
-
else
|
99
|
-
elem
|
40
|
+
def output_reference_links(links)
|
41
|
+
references = []
|
42
|
+
links.each_with_index do |(href, title), i|
|
43
|
+
references << "[#{i + 1}]: #{href}#{title ? ' ' + title.inspect : ''}"
|
100
44
|
end
|
45
|
+
references.join("\n")
|
101
46
|
end
|
102
47
|
|
103
|
-
|
104
|
-
title_markup = title ? %( "#{title}") : ''
|
105
|
-
"[#{text}](#{href}#{title_markup})"
|
106
|
-
end
|
48
|
+
private
|
107
49
|
|
108
|
-
def
|
109
|
-
|
50
|
+
def ignore_element?(elem)
|
51
|
+
IGNORE.include?(elem.name) or (@ignored_elements and @ignored_elements.include?(elem))
|
110
52
|
end
|
111
53
|
|
112
|
-
def
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
items = remark_children(list).map do |item|
|
118
|
-
current = unordered ? marker : "#{marker += 1}."
|
119
|
-
if item =~ /\A\s/
|
120
|
-
nested = true
|
121
|
-
item[0, current.length] = current
|
122
|
-
item
|
123
|
-
else
|
124
|
-
current + ' ' + item
|
125
|
-
end
|
54
|
+
def collect_ignored_elements(scope)
|
55
|
+
if @options[:ignores]
|
56
|
+
@options[:ignored_elements] = @options[:ignores].map do |expr|
|
57
|
+
scope.search(expr).to_a
|
58
|
+
end.flatten.uniq
|
126
59
|
end
|
127
|
-
|
128
|
-
items.join("\n" * (nested ? 2 : 1))
|
129
60
|
end
|
130
61
|
end
|
131
|
-
|
132
|
-
Object.class_eval do
|
133
|
-
def blank?() false end
|
134
|
-
end
|
135
|
-
|
136
|
-
NilClass.class_eval do
|
137
|
-
def blank?() true end
|
138
|
-
end
|
139
|
-
|
140
|
-
String.class_eval do
|
141
|
-
def blank?
|
142
|
-
self.empty? or !!(self =~ /\A\s+\Z/)
|
143
|
-
end
|
144
|
-
|
145
|
-
def squish
|
146
|
-
self.strip.gsub!(/\s+/, ' ')
|
147
|
-
end
|
148
|
-
|
149
|
-
def indent(with = ' ' * 4)
|
150
|
-
self.gsub(/^/, with)
|
151
|
-
end
|
152
|
-
end
|
153
|
-
|
154
|
-
Hpricot::Text.class_eval do
|
155
|
-
def blank?() to_s.blank? end
|
156
|
-
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
Object.class_eval do
|
2
|
+
def blank?() false end
|
3
|
+
end
|
4
|
+
|
5
|
+
NilClass.class_eval do
|
6
|
+
def blank?() true end
|
7
|
+
end
|
8
|
+
|
9
|
+
String.class_eval do
|
10
|
+
def blank?
|
11
|
+
self.empty? or !!(self =~ /\A\s+\Z/)
|
12
|
+
end
|
13
|
+
|
14
|
+
def squeeze_whitespace
|
15
|
+
self.tr("\n\t", ' ').squeeze(' ')
|
16
|
+
end
|
17
|
+
|
18
|
+
def indent(with = ' ' * 4)
|
19
|
+
self.gsub(/^/, with)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
Hpricot::Text.module_eval do
|
24
|
+
def blank?() to_s.blank? end
|
25
|
+
end
|
@@ -0,0 +1,196 @@
|
|
1
|
+
require 'hpricot'
|
2
|
+
require 'remark/core_ext'
|
3
|
+
|
4
|
+
# this applies the default behavior to virtually all Hpricot classes
|
5
|
+
Hpricot::Node.module_eval do
|
6
|
+
def to_markdown(options = {}) nil end
|
7
|
+
def markdown_block?() false end
|
8
|
+
end
|
9
|
+
|
10
|
+
# nothing special to process on Text or CData
|
11
|
+
Hpricot::Text.module_eval do
|
12
|
+
def to_markdown(options = {}) to_s.squeeze_whitespace end
|
13
|
+
end
|
14
|
+
|
15
|
+
Hpricot::CData.module_eval do
|
16
|
+
def to_markdown(options = {}) to_s.squeeze_whitespace end
|
17
|
+
end
|
18
|
+
|
19
|
+
# elements that have children
|
20
|
+
Hpricot::Container.module_eval do
|
21
|
+
def to_markdown(options = {})
|
22
|
+
return '' unless self.children
|
23
|
+
previous_was_block = false
|
24
|
+
parent_is_block = self.markdown_block?
|
25
|
+
|
26
|
+
# recurse over this element's children
|
27
|
+
content = self.children.inject([]) do |all, child|
|
28
|
+
current_is_block = child.markdown_block?
|
29
|
+
child_content = child.to_markdown(options)
|
30
|
+
|
31
|
+
# skip this node if its markdown is nil, empty or, in case
|
32
|
+
# that the previous element was a block, all-whitespace
|
33
|
+
unless child_content.nil? or child_content.empty? or (previous_was_block and child_content.blank?)
|
34
|
+
# handle separating of adjacent markdown blocks with an empty line
|
35
|
+
if not all.empty? and current_is_block or previous_was_block
|
36
|
+
# strip trailing whitespace if we're opening a new block
|
37
|
+
all.last.blank?? all.pop : all.last.rstrip!
|
38
|
+
# guard against adding a newline at the beginning
|
39
|
+
all << "\n\n" if all.any?
|
40
|
+
end
|
41
|
+
|
42
|
+
unless 'pre' == child.name
|
43
|
+
# strip whitespace from the left if ...
|
44
|
+
child_content.lstrip! if previous_was_block or # we're adjacent to a block
|
45
|
+
(parent_is_block and child == self.children.first) or # this is the first child
|
46
|
+
(not all.empty? and all.last =~ / ( \n)?$/) # we're following a space or a forced line break token
|
47
|
+
|
48
|
+
|
49
|
+
# strip whitespace from the right if this is the last node in a block
|
50
|
+
child_content.rstrip! if parent_is_block and self.children.last == child
|
51
|
+
end
|
52
|
+
|
53
|
+
all << child_content
|
54
|
+
end
|
55
|
+
|
56
|
+
previous_was_block = current_is_block
|
57
|
+
all
|
58
|
+
end
|
59
|
+
|
60
|
+
result = content.join('')
|
61
|
+
return result
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# elements without children
|
66
|
+
Hpricot::Leaf.module_eval do
|
67
|
+
def to_markdown(options = {})
|
68
|
+
inner_text.squeeze_whitespace if elem?
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
Hpricot::Elem.module_eval do
|
73
|
+
IGNORE = %w(script head style)
|
74
|
+
ALLOWED_EMPTY = %w(img br hr )
|
75
|
+
MARKDOWN_BLOCK = %w(p blockquote h1 h2 h3 h4 h5 h6 pre hr)
|
76
|
+
MARKDOWN_INLINE = %w(em strong code a img br)
|
77
|
+
MARKDOWN_RECOGNIZED = MARKDOWN_BLOCK + MARKDOWN_INLINE + %w(div)
|
78
|
+
HTML_BLOCK = MARKDOWN_BLOCK + %w(ul ol dl div noscript form table address fieldset)
|
79
|
+
|
80
|
+
def to_markdown(options = {})
|
81
|
+
return nil if markdown_ignored?(options)
|
82
|
+
return '' if markdown_empty?
|
83
|
+
return to_s unless markdown_supported_attributes?
|
84
|
+
|
85
|
+
case name
|
86
|
+
when 'div', 'noscript'
|
87
|
+
super
|
88
|
+
when 'p'
|
89
|
+
super
|
90
|
+
when /^h([1-6])$/
|
91
|
+
('#' * $1.to_i) + ' ' + super
|
92
|
+
when 'ul', 'ol'
|
93
|
+
remark_list(options)
|
94
|
+
when 'li'
|
95
|
+
content = super
|
96
|
+
content = content.indent if children.any? { |e| e.markdown_block? }
|
97
|
+
content
|
98
|
+
when 'pre'
|
99
|
+
inner_text.rstrip.indent
|
100
|
+
when 'em'
|
101
|
+
"_#{super}_"
|
102
|
+
when 'strong'
|
103
|
+
"**#{super}**"
|
104
|
+
when 'code'
|
105
|
+
code = inner_text
|
106
|
+
code.index('`') ? "`` #{code} ``" : "`#{code}`"
|
107
|
+
when 'a'
|
108
|
+
remark_link(super, attributes['href'], attributes['title'], options)
|
109
|
+
when 'img'
|
110
|
+
'!' + remark_link(attributes['alt'], attributes['src'], attributes['title'], :reference_links => false)
|
111
|
+
when 'blockquote'
|
112
|
+
super.indent('> ')
|
113
|
+
when 'br'
|
114
|
+
" \n" + inner_html
|
115
|
+
else
|
116
|
+
to_s
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
def remark_list(options = {})
|
121
|
+
unordered = self.name == 'ul'
|
122
|
+
marker = unordered ? '*' : 0
|
123
|
+
nested = false
|
124
|
+
|
125
|
+
items = self.children_of_type('li').map do |item|
|
126
|
+
item = item.to_markdown(options)
|
127
|
+
current = unordered ? marker : "#{marker += 1}."
|
128
|
+
if item =~ /\A\s/
|
129
|
+
nested = true
|
130
|
+
item[0, current.length] = current
|
131
|
+
item
|
132
|
+
else
|
133
|
+
current + ' ' + item
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
items.join("\n" * (nested ? 2 : 1))
|
138
|
+
end
|
139
|
+
|
140
|
+
def markdown_block?
|
141
|
+
HTML_BLOCK.include?(name)
|
142
|
+
end
|
143
|
+
|
144
|
+
def markdown_recognized?
|
145
|
+
MARKDOWN_RECOGNIZED.include?(name)
|
146
|
+
end
|
147
|
+
|
148
|
+
protected
|
149
|
+
|
150
|
+
def markdown_ignored?(options)
|
151
|
+
IGNORE.include?(name) or
|
152
|
+
(options[:ignored_elements] and options[:ignored_elements].include?(self))
|
153
|
+
end
|
154
|
+
|
155
|
+
def markdown_empty?
|
156
|
+
empty? and markdown_recognized? and not ALLOWED_EMPTY.include?(name)
|
157
|
+
end
|
158
|
+
|
159
|
+
def markdown_supported_attributes?
|
160
|
+
case name
|
161
|
+
when 'div'
|
162
|
+
true
|
163
|
+
when 'a'
|
164
|
+
attribute_names_match?('href', 'title')
|
165
|
+
when 'img'
|
166
|
+
attribute_names_match?(%w(alt src), 'title')
|
167
|
+
when 'ol', 'ul'
|
168
|
+
attributes.empty? and children.all? do |item|
|
169
|
+
not item.elem? or (item.name == 'li' and item.attributes.empty?)
|
170
|
+
end
|
171
|
+
else
|
172
|
+
attributes.empty?
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
def attribute_names_match?(only, optional = nil)
|
177
|
+
names = attributes.keys.sort
|
178
|
+
names -= Array(optional) if optional
|
179
|
+
names == Array(only)
|
180
|
+
end
|
181
|
+
|
182
|
+
def remark_link(text, href, title = nil, options = {})
|
183
|
+
if options[:reference_links]
|
184
|
+
if existing = options[:links].find { |h, t| href == h }
|
185
|
+
num = options[:links].index(existing) + 1
|
186
|
+
else
|
187
|
+
options[:links] << [href, title]
|
188
|
+
num = options[:links].length
|
189
|
+
end
|
190
|
+
"[#{text}][#{num}]"
|
191
|
+
else
|
192
|
+
title_markup = title ? %( "#{title}") : ''
|
193
|
+
"[#{text}](#{href}#{title_markup})"
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|
@@ -0,0 +1,138 @@
|
|
1
|
+
require 'remark/hpricot_ext'
|
2
|
+
|
3
|
+
describe Hpricot, "remark extensions" do
|
4
|
+
before(:all) do
|
5
|
+
@doc = Hpricot(<<-HTML.strip)
|
6
|
+
<?xml version="moo" ?>
|
7
|
+
<!DOCTYPE html>
|
8
|
+
<html>
|
9
|
+
<head>
|
10
|
+
<title>Sample document</title>
|
11
|
+
</head>
|
12
|
+
<body>
|
13
|
+
<h1>Sample <strong>Remark</strong> document</h1>
|
14
|
+
<p>
|
15
|
+
A paragraph with <em>nested</em> <strong>content</strong>
|
16
|
+
and <i>Remark</i>-supported elements.
|
17
|
+
</p>
|
18
|
+
|
19
|
+
<a name="content"> </a>
|
20
|
+
<h2>The content</h2>
|
21
|
+
<div id="content">
|
22
|
+
<p>First</p>
|
23
|
+
<p>Second</p>
|
24
|
+
Some content
|
25
|
+
<em>in-between</em>
|
26
|
+
<p>Third</p>
|
27
|
+
</div>
|
28
|
+
<p class="foo">I has classname</p>
|
29
|
+
|
30
|
+
<div id="empty"></div>
|
31
|
+
<blockquote>
|
32
|
+
Some famous quote
|
33
|
+
<blockquote>Nested famous quote</blockquote>
|
34
|
+
</blockquote>
|
35
|
+
<div class="code">
|
36
|
+
<p>Sample code:</p>
|
37
|
+
<pre>def preformatted
|
38
|
+
text
|
39
|
+
end
|
40
|
+
</pre>
|
41
|
+
</div>
|
42
|
+
<img src='moo.jpg' alt='cow'>
|
43
|
+
<img src='moo.jpg' alt='cow' width='16'>
|
44
|
+
|
45
|
+
<code>simple</code> <code>comp ` lex</code> <code><tag></code>
|
46
|
+
|
47
|
+
<div id="br">
|
48
|
+
<p>Foo<br>bar</p>
|
49
|
+
<p>Foo<br>
|
50
|
+
bar <code>baz</code></p>
|
51
|
+
<p>Foo</p><br><br><p>Bar</p><br>
|
52
|
+
</div>
|
53
|
+
|
54
|
+
<ul>
|
55
|
+
<li>First</li>
|
56
|
+
<li>Second</li>
|
57
|
+
</ul>
|
58
|
+
<ol>
|
59
|
+
<li>First</li>
|
60
|
+
<li>Second</li>
|
61
|
+
</ol>
|
62
|
+
</body>
|
63
|
+
</html>
|
64
|
+
HTML
|
65
|
+
end
|
66
|
+
|
67
|
+
def remark(elem, options = {})
|
68
|
+
(String === elem ? @doc.at(elem) : elem).to_markdown(options)
|
69
|
+
end
|
70
|
+
|
71
|
+
it "should return empty string for empty document" do
|
72
|
+
remark(Hpricot('')).should == ''
|
73
|
+
end
|
74
|
+
|
75
|
+
it "should ignore DOCTYPE, HEAD and XML processing instructions" do
|
76
|
+
remark('head').should be_nil
|
77
|
+
remark(@doc.children[0]).should be_nil # doctype
|
78
|
+
remark(@doc.children[2]).should be_nil # xmldecl
|
79
|
+
end
|
80
|
+
|
81
|
+
it "should have whitespace nodes respond to blank" do
|
82
|
+
@doc.at('a[@name]').children.first.blank?
|
83
|
+
end
|
84
|
+
|
85
|
+
it "should support headings" do
|
86
|
+
remark('h1').should == "# Sample **Remark** document"
|
87
|
+
remark('h2').should == "## The content"
|
88
|
+
end
|
89
|
+
|
90
|
+
it "should support paragraphs" do
|
91
|
+
remark('p').should == "A paragraph with _nested_ **content** and <i>Remark</i>-supported elements."
|
92
|
+
end
|
93
|
+
|
94
|
+
it "should split paragraphs with an empty line" do
|
95
|
+
remark('#content').should == "First\n\nSecond\n\nSome content _in-between_\n\nThird"
|
96
|
+
end
|
97
|
+
|
98
|
+
it "should keep full HTML for paragraphs if they have attributes" do
|
99
|
+
remark('p.foo').should == '<p class="foo">I has classname</p>'
|
100
|
+
end
|
101
|
+
|
102
|
+
it "should not break on empty DIV" do
|
103
|
+
remark('#empty').should == ""
|
104
|
+
end
|
105
|
+
|
106
|
+
it "should support blockquotes" do
|
107
|
+
remark('blockquote > blockquote').should == "> Nested famous quote"
|
108
|
+
remark('blockquote').should == "> Some famous quote\n> \n> > Nested famous quote"
|
109
|
+
end
|
110
|
+
|
111
|
+
it "should support preformatted text" do
|
112
|
+
remark('div.code').should == "Sample code:\n\n def preformatted\n text\n end"
|
113
|
+
end
|
114
|
+
|
115
|
+
it "should support image tags" do
|
116
|
+
remark('img[@alt]').should == '![cow](moo.jpg)'
|
117
|
+
remark('img[@width]').should == '<img src="moo.jpg" alt="cow" width="16" />'
|
118
|
+
end
|
119
|
+
|
120
|
+
it "should support code spans" do
|
121
|
+
remark('code').should == "`simple`"
|
122
|
+
remark('code ~ code').should == "`` comp ` lex ``"
|
123
|
+
remark('code ~ code ~ code').should == "`<tag>`"
|
124
|
+
end
|
125
|
+
|
126
|
+
it "should support BR" do
|
127
|
+
remark('#br').should == "Foo \nbar\n\nFoo \nbar `baz`\n\nFoo\n\nBar"
|
128
|
+
end
|
129
|
+
|
130
|
+
it "should support unordered list" do
|
131
|
+
remark('ul').should == "* First\n* Second"
|
132
|
+
end
|
133
|
+
|
134
|
+
it "should support ordered list" do
|
135
|
+
remark('ol').should == "1. First\n2. Second"
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
data/spec/remark_spec.rb
CHANGED
@@ -1,8 +1,9 @@
|
|
1
1
|
require 'remark'
|
2
2
|
|
3
3
|
describe Remark do
|
4
|
-
def remark(source)
|
5
|
-
|
4
|
+
def remark(source, options = {})
|
5
|
+
options = {:reference_links => false}.merge(options)
|
6
|
+
described_class.new(source, options).to_markdown
|
6
7
|
end
|
7
8
|
|
8
9
|
it "should let through text content" do
|
@@ -10,17 +11,6 @@ describe Remark do
|
|
10
11
|
remark("Foo bar\nbaz").should == 'Foo bar baz'
|
11
12
|
end
|
12
13
|
|
13
|
-
it "should split paragraphs with an empty line" do
|
14
|
-
remark("<p>Foo bar</p>").should == 'Foo bar'
|
15
|
-
remark("<p>Foo bar</p><p>baz").should == "Foo bar\n\nbaz"
|
16
|
-
remark("<p>Foo bar</p>baz").should == "Foo bar\n\nbaz"
|
17
|
-
end
|
18
|
-
|
19
|
-
it "should output title syntax" do
|
20
|
-
remark("<h1>Foo bar</h1>").should == '# Foo bar'
|
21
|
-
remark("<h2>Foo bar</h2>").should == '## Foo bar'
|
22
|
-
end
|
23
|
-
|
24
14
|
it "should preserve elements in remarked blocks" do
|
25
15
|
remark("<p>Foo <ins>bar</ins></p>").should == 'Foo <ins>bar</ins>'
|
26
16
|
remark("<h2>Foo <ins>bar</ins></h2>").should == '## Foo <ins>bar</ins>'
|
@@ -31,15 +21,6 @@ describe Remark do
|
|
31
21
|
remark("<p>If you’re doing all your development on the “master” branch, you’re not using git").should == "If you’re doing all your development on the “master” branch, you’re not using git"
|
32
22
|
end
|
33
23
|
|
34
|
-
it "should ignore tags without user-facing content" do
|
35
|
-
remark("<script>foo</script>").should == ''
|
36
|
-
remark("<head>foo</head>").should == ''
|
37
|
-
end
|
38
|
-
|
39
|
-
it "should leave known elements with attributes intact" do
|
40
|
-
remark("<p class='notice'>Kittens attack!</p>").should == '<p class="notice">Kittens attack!</p>'
|
41
|
-
end
|
42
|
-
|
43
24
|
it "should leave unknown elements intact" do
|
44
25
|
remark(<<-HTML).should == "Foo\n\n<table>data</table>\n\nBar"
|
45
26
|
<p>Foo</p>
|
@@ -48,109 +29,129 @@ describe Remark do
|
|
48
29
|
HTML
|
49
30
|
end
|
50
31
|
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
32
|
+
describe "whitespace" do
|
33
|
+
it "should strip excess whitespace" do
|
34
|
+
remark(<<-HTML).should == "Foo bar"
|
35
|
+
<p>
|
36
|
+
Foo
|
37
|
+
bar
|
38
|
+
</p>
|
39
|
+
HTML
|
40
|
+
end
|
59
41
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
<p>Foo</p>
|
42
|
+
it "should strip whitespace in text nodes between processed nodes" do
|
43
|
+
remark(<<-HTML).should == "Foo\n\nbar\n\nBaz"
|
44
|
+
<p>Foo</p>
|
64
45
|
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
<
|
75
|
-
|
76
|
-
|
46
|
+
bar
|
47
|
+
<p>Baz</p>
|
48
|
+
HTML
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
describe "lists" do
|
53
|
+
it "should support lists" do
|
54
|
+
remark(<<-HTML).should == "* foo\n* bar"
|
55
|
+
<ul>
|
56
|
+
<li>foo</li>
|
57
|
+
<li>bar</li>
|
58
|
+
</ul>
|
59
|
+
HTML
|
77
60
|
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
61
|
+
remark(<<-HTML).should == "1. foo\n2. bar"
|
62
|
+
<ol>
|
63
|
+
<li>foo</li>
|
64
|
+
<li>bar</li>
|
65
|
+
</ol>
|
66
|
+
HTML
|
67
|
+
end
|
68
|
+
|
69
|
+
it "should support lists with nested content" do
|
70
|
+
remark(<<-HTML).should == "* foo\n \n bar\n\n* baz"
|
71
|
+
<ul>
|
72
|
+
<li><p>foo</p><p>bar</p></li>
|
73
|
+
<li><p>baz</p></li>
|
74
|
+
</ul>
|
75
|
+
HTML
|
76
|
+
end
|
77
|
+
|
78
|
+
it "should output malformed lists as HTML" do
|
79
|
+
remark(<<-HTML).should == "<ul>\n <span>bar</span>\n </ul>"
|
80
|
+
<ul>
|
81
|
+
<span>bar</span>
|
82
|
+
</ul>
|
83
|
+
HTML
|
84
|
+
end
|
93
85
|
end
|
94
86
|
|
95
87
|
it "should support preformatted blocks" do
|
96
88
|
remark("<pre>def foo\n bar\nend</pre>").should == " def foo\n bar\n end"
|
97
89
|
remark("<pre><code>def foo\n <bar>\nend</code></pre>").should == " def foo\n <bar>\n end"
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
remark("<p>Foo</
|
133
|
-
end
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
<
|
139
|
-
<
|
140
|
-
<
|
141
|
-
|
142
|
-
|
143
|
-
<
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
<
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
90
|
+
remark("<pre>def foo\n</pre>").should == " def foo"
|
91
|
+
end
|
92
|
+
|
93
|
+
describe "inline" do
|
94
|
+
it "should remark inline elements" do
|
95
|
+
remark("<p>I'm so <strong>strong</strong></p>").should == "I'm so **strong**"
|
96
|
+
remark("<p>I'm so <em>emo</em></p>").should == "I'm so _emo_"
|
97
|
+
remark("<ul><li><em>Inline</em> stuff in <strong>lists</strong></li></ul>").should == "* _Inline_ stuff in **lists**"
|
98
|
+
remark("<h1>Headings <em>too</em></h1>").should == '# Headings _too_'
|
99
|
+
end
|
100
|
+
|
101
|
+
it "should handle nested inline elements" do
|
102
|
+
remark("<p>I <strong>love <code>code</code></strong></p>").should == "I **love `code`**"
|
103
|
+
remark("<p>I <a href='#'>am <em>fine</em></a></p>").should == "I [am _fine_](#)"
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
describe "hyperlinks" do
|
108
|
+
it "should support hyperlinks" do
|
109
|
+
remark("<p>Click <a href='http://mislav.uniqpath.com'>here</a></p>").should ==
|
110
|
+
"Click [here](http://mislav.uniqpath.com)"
|
111
|
+
remark("<a href='/foo' title='bar'>baz</a>").should == '[baz](/foo "bar")'
|
112
|
+
end
|
113
|
+
|
114
|
+
it "should have reference-style hyperlinks" do
|
115
|
+
remark("<p>Click <a href='foo' title='mooslav'>here</a> and <a href='bar'>there</a></p>", :reference_links => true).should ==
|
116
|
+
"Click [here][1] and [there][2]\n\n\n[1]: foo \"mooslav\"\n[2]: bar"
|
117
|
+
remark("<p>Click <a href='foo'>here</a> and <a href='foo'>there</a></p>", :reference_links => true).should ==
|
118
|
+
"Click [here][1] and [there][1]\n\n\n[1]: foo"
|
119
|
+
remark("", :reference_links => true).should == ""
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
it "should support ignores" do
|
124
|
+
remark("<p>Foo <span>bar</span> baz</p>", :ignores => ['span']).should == "Foo baz"
|
125
|
+
end
|
126
|
+
|
127
|
+
describe "scoping" do
|
128
|
+
before do
|
129
|
+
@html = <<-HTML
|
130
|
+
<html>
|
131
|
+
<body>
|
132
|
+
<div id="div1">
|
133
|
+
<p>Only 1 paragraph</p>
|
134
|
+
</div>
|
135
|
+
<div id="div3">
|
136
|
+
<p>Wow, 3 paragraphs</p>
|
137
|
+
<p>This must be where the content is</p>
|
138
|
+
<p>I'm sure</p>
|
139
|
+
</div>
|
140
|
+
<div id="div2">
|
141
|
+
<p>Only 2 paragraphs</p>
|
142
|
+
<p>How disappointing</p>
|
143
|
+
</div>
|
144
|
+
</body>
|
145
|
+
</html>
|
146
|
+
HTML
|
147
|
+
end
|
148
|
+
|
149
|
+
it "should scope to the most likely element that holds content" do
|
150
|
+
remark(@html).should == "Wow, 3 paragraphs\n\nThis must be where the content is\n\nI'm sure"
|
151
|
+
end
|
152
|
+
|
153
|
+
it "should scope to the explicit scope" do
|
154
|
+
remark(@html, :scope => '#div2').should == "Only 2 paragraphs\n\nHow disappointing"
|
155
|
+
end
|
154
156
|
end
|
155
157
|
end
|
156
|
-
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mislav-remark
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- "Mislav Marohni\xC4\x87"
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-07-30 00:00:00 -07:00
|
13
13
|
default_executable: remark
|
14
14
|
dependencies: []
|
15
15
|
|
@@ -24,12 +24,16 @@ extra_rdoc_files: []
|
|
24
24
|
files:
|
25
25
|
- Rakefile
|
26
26
|
- bin/remark
|
27
|
+
- lib/remark/core_ext.rb
|
28
|
+
- lib/remark/hpricot_ext.rb
|
27
29
|
- lib/remark.rb
|
30
|
+
- spec/hpricot_ext_spec.rb
|
28
31
|
- spec/remark_spec.rb
|
29
32
|
- spec/sample.html
|
30
33
|
- README.markdown
|
31
34
|
has_rdoc: false
|
32
35
|
homepage: http://github.com/mislav/remark
|
36
|
+
licenses:
|
33
37
|
post_install_message:
|
34
38
|
rdoc_options: []
|
35
39
|
|
@@ -50,7 +54,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
50
54
|
requirements: []
|
51
55
|
|
52
56
|
rubyforge_project:
|
53
|
-
rubygems_version: 1.
|
57
|
+
rubygems_version: 1.3.5
|
54
58
|
signing_key:
|
55
59
|
specification_version: 3
|
56
60
|
summary: HTML to Markdown converter
|