mislav-remark 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +7 -1
- data/bin/remark +19 -1
- data/lib/remark.rb +30 -125
- data/lib/remark/core_ext.rb +25 -0
- data/lib/remark/hpricot_ext.rb +196 -0
- data/spec/hpricot_ext_spec.rb +138 -0
- data/spec/remark_spec.rb +119 -118
- metadata +7 -3
data/Rakefile
CHANGED
@@ -1,14 +1,20 @@
|
|
1
|
+
desc "renders the spec/sample.html to Markdown"
|
2
|
+
task :sample do
|
3
|
+
system %(ruby -Ilib -rubygems bin/remark spec/sample.html)
|
4
|
+
end
|
5
|
+
|
1
6
|
desc "generates .gemspec file"
|
2
7
|
task :gemspec do
|
3
8
|
spec = Gem::Specification.new do |gem|
|
4
9
|
gem.name = "remark"
|
10
|
+
gem.version = '0.3.0'
|
11
|
+
|
5
12
|
gem.summary = "HTML to Markdown converter"
|
6
13
|
gem.email = "mislav.marohnic@gmail.com"
|
7
14
|
gem.homepage = "http://github.com/mislav/remark"
|
8
15
|
gem.authors = ["Mislav Marohnić"]
|
9
16
|
gem.has_rdoc = false
|
10
17
|
|
11
|
-
gem.version = '0.2.1'
|
12
18
|
gem.files = FileList['Rakefile', '{bin,lib,rails,spec}/**/*', 'README*', 'LICENSE*'] & `git ls-files`.split("\n")
|
13
19
|
gem.executables = Dir['bin/*'].map { |f| File.basename(f) }
|
14
20
|
end
|
data/bin/remark
CHANGED
@@ -1,4 +1,22 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
require 'optparse'
|
2
3
|
require 'remark'
|
3
4
|
|
4
|
-
|
5
|
+
options = {}
|
6
|
+
OptionParser.new do |opts|
|
7
|
+
opts.banner = "Usage: remark [options] [FILE]"
|
8
|
+
|
9
|
+
opts.on("-n", "--inline-links", "Render link URLs inline (instead of reference-style)") do |inline|
|
10
|
+
options[:reference_links] = !inline
|
11
|
+
end
|
12
|
+
|
13
|
+
opts.on("-s", "--scope EXPR", "Scope to a spefic CSS/XPath expression in the HTML document") do |scope|
|
14
|
+
options[:scope] = scope
|
15
|
+
end
|
16
|
+
|
17
|
+
opts.on("-i", "--ignore EXPR", "Ignore elements that match CSS/XPath expression") do |expr|
|
18
|
+
(options[:ignores] ||= []) << expr
|
19
|
+
end
|
20
|
+
end.parse!
|
21
|
+
|
22
|
+
puts Remark.new(ARGF.read, options).to_markdown
|
data/lib/remark.rb
CHANGED
@@ -1,16 +1,27 @@
|
|
1
|
-
require '
|
1
|
+
require 'remark/hpricot_ext'
|
2
2
|
|
3
3
|
class Remark
|
4
|
-
|
4
|
+
DEFAULT_OPTIONS = { :reference_links => true }
|
5
|
+
|
6
|
+
def initialize(source, options = {})
|
5
7
|
@doc = Hpricot(source)
|
8
|
+
@options = DEFAULT_OPTIONS.merge options
|
9
|
+
@links = []
|
10
|
+
@ignored_elements = nil
|
6
11
|
end
|
7
12
|
|
8
13
|
def to_markdown
|
9
|
-
|
14
|
+
parent = scope
|
15
|
+
collect_ignored_elements(parent)
|
16
|
+
links = @options[:links] = [] unless inline_links?
|
17
|
+
result = parent.to_markdown(@options)
|
18
|
+
result + (inline_links? || links.empty?? '' : "\n\n\n" + output_reference_links(links))
|
10
19
|
end
|
11
20
|
|
12
21
|
def scope
|
13
|
-
if
|
22
|
+
if scope = @options[:scope]
|
23
|
+
@doc.at(scope)
|
24
|
+
elsif body = @doc.at('/html/body')
|
14
25
|
candidates = (body / 'p').inject(Hash.new(0)) do |memo, para|
|
15
26
|
memo[para.parent] += 1
|
16
27
|
memo
|
@@ -22,135 +33,29 @@ class Remark
|
|
22
33
|
end
|
23
34
|
end
|
24
35
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
private
|
29
|
-
|
30
|
-
def valid_attributes?(elem)
|
31
|
-
case elem.name
|
32
|
-
when 'a'
|
33
|
-
(elem.attributes.keys - %w(title)) == %w(href)
|
34
|
-
when 'img'
|
35
|
-
(elem.attributes.keys - %w(title)).sort == %w(alt src)
|
36
|
-
else
|
37
|
-
elem.attributes.empty?
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
def remark_block(elem)
|
42
|
-
remark_children(elem).
|
43
|
-
reject { |item| item.blank? }.
|
44
|
-
join("\n\n")
|
36
|
+
def inline_links?
|
37
|
+
!@options[:reference_links]
|
45
38
|
end
|
46
39
|
|
47
|
-
def
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
remarked << result if result
|
52
|
-
end
|
53
|
-
remarked
|
54
|
-
end
|
55
|
-
|
56
|
-
def remark_item(item)
|
57
|
-
if item.text?
|
58
|
-
item.to_s.gsub(/\n+/, ' ') unless item.blank?
|
59
|
-
elsif item.elem?
|
60
|
-
if IGNORE.include?(item.name)
|
61
|
-
nil
|
62
|
-
elsif valid_attributes?(item)
|
63
|
-
remark_element(item)
|
64
|
-
else
|
65
|
-
item
|
66
|
-
end
|
67
|
-
end
|
68
|
-
end
|
69
|
-
|
70
|
-
def remark_element(elem)
|
71
|
-
case elem.name
|
72
|
-
when 'p'
|
73
|
-
remark_inline(elem)
|
74
|
-
when /^h([1-6])$/
|
75
|
-
('#' * $1.to_i) + ' ' + remark_inline(elem)
|
76
|
-
when 'ul', 'ol'
|
77
|
-
remark_list(elem)
|
78
|
-
when 'li'
|
79
|
-
elem.children.any? { |e| e.elem? and BLOCK.include?(e.name) } ?
|
80
|
-
remark_block(elem).indent : remark_inline(elem)
|
81
|
-
when 'pre'
|
82
|
-
elem.inner_text.indent
|
83
|
-
when 'em'
|
84
|
-
"_#{elem.inner_text}_"
|
85
|
-
when 'strong'
|
86
|
-
"**#{elem.inner_text}**"
|
87
|
-
when 'code'
|
88
|
-
code = elem.inner_text
|
89
|
-
code.index('`') ? "`` #{code} ``" : "`#{code}`"
|
90
|
-
when 'a'
|
91
|
-
remark_link(elem.inner_html, elem.attributes['href'], elem.attributes['title'])
|
92
|
-
when 'img'
|
93
|
-
'!' + remark_link(elem.attributes['alt'], elem.attributes['src'], elem.attributes['title'])
|
94
|
-
when 'blockquote'
|
95
|
-
remark_children(elem).join("\n\n").indent('> ')
|
96
|
-
when 'br'
|
97
|
-
" \n" + elem.inner_html
|
98
|
-
else
|
99
|
-
elem
|
40
|
+
def output_reference_links(links)
|
41
|
+
references = []
|
42
|
+
links.each_with_index do |(href, title), i|
|
43
|
+
references << "[#{i + 1}]: #{href}#{title ? ' ' + title.inspect : ''}"
|
100
44
|
end
|
45
|
+
references.join("\n")
|
101
46
|
end
|
102
47
|
|
103
|
-
|
104
|
-
title_markup = title ? %( "#{title}") : ''
|
105
|
-
"[#{text}](#{href}#{title_markup})"
|
106
|
-
end
|
48
|
+
private
|
107
49
|
|
108
|
-
def
|
109
|
-
|
50
|
+
def ignore_element?(elem)
|
51
|
+
IGNORE.include?(elem.name) or (@ignored_elements and @ignored_elements.include?(elem))
|
110
52
|
end
|
111
53
|
|
112
|
-
def
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
items = remark_children(list).map do |item|
|
118
|
-
current = unordered ? marker : "#{marker += 1}."
|
119
|
-
if item =~ /\A\s/
|
120
|
-
nested = true
|
121
|
-
item[0, current.length] = current
|
122
|
-
item
|
123
|
-
else
|
124
|
-
current + ' ' + item
|
125
|
-
end
|
54
|
+
def collect_ignored_elements(scope)
|
55
|
+
if @options[:ignores]
|
56
|
+
@options[:ignored_elements] = @options[:ignores].map do |expr|
|
57
|
+
scope.search(expr).to_a
|
58
|
+
end.flatten.uniq
|
126
59
|
end
|
127
|
-
|
128
|
-
items.join("\n" * (nested ? 2 : 1))
|
129
60
|
end
|
130
61
|
end
|
131
|
-
|
132
|
-
Object.class_eval do
|
133
|
-
def blank?() false end
|
134
|
-
end
|
135
|
-
|
136
|
-
NilClass.class_eval do
|
137
|
-
def blank?() true end
|
138
|
-
end
|
139
|
-
|
140
|
-
String.class_eval do
|
141
|
-
def blank?
|
142
|
-
self.empty? or !!(self =~ /\A\s+\Z/)
|
143
|
-
end
|
144
|
-
|
145
|
-
def squish
|
146
|
-
self.strip.gsub!(/\s+/, ' ')
|
147
|
-
end
|
148
|
-
|
149
|
-
def indent(with = ' ' * 4)
|
150
|
-
self.gsub(/^/, with)
|
151
|
-
end
|
152
|
-
end
|
153
|
-
|
154
|
-
Hpricot::Text.class_eval do
|
155
|
-
def blank?() to_s.blank? end
|
156
|
-
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
Object.class_eval do
|
2
|
+
def blank?() false end
|
3
|
+
end
|
4
|
+
|
5
|
+
NilClass.class_eval do
|
6
|
+
def blank?() true end
|
7
|
+
end
|
8
|
+
|
9
|
+
String.class_eval do
|
10
|
+
def blank?
|
11
|
+
self.empty? or !!(self =~ /\A\s+\Z/)
|
12
|
+
end
|
13
|
+
|
14
|
+
def squeeze_whitespace
|
15
|
+
self.tr("\n\t", ' ').squeeze(' ')
|
16
|
+
end
|
17
|
+
|
18
|
+
def indent(with = ' ' * 4)
|
19
|
+
self.gsub(/^/, with)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
Hpricot::Text.module_eval do
|
24
|
+
def blank?() to_s.blank? end
|
25
|
+
end
|
@@ -0,0 +1,196 @@
|
|
1
|
+
require 'hpricot'
|
2
|
+
require 'remark/core_ext'
|
3
|
+
|
4
|
+
# this applies the default behavior to virtually all Hpricot classes
|
5
|
+
Hpricot::Node.module_eval do
|
6
|
+
def to_markdown(options = {}) nil end
|
7
|
+
def markdown_block?() false end
|
8
|
+
end
|
9
|
+
|
10
|
+
# nothing special to process on Text or CData
|
11
|
+
Hpricot::Text.module_eval do
|
12
|
+
def to_markdown(options = {}) to_s.squeeze_whitespace end
|
13
|
+
end
|
14
|
+
|
15
|
+
Hpricot::CData.module_eval do
|
16
|
+
def to_markdown(options = {}) to_s.squeeze_whitespace end
|
17
|
+
end
|
18
|
+
|
19
|
+
# elements that have children
|
20
|
+
Hpricot::Container.module_eval do
|
21
|
+
def to_markdown(options = {})
|
22
|
+
return '' unless self.children
|
23
|
+
previous_was_block = false
|
24
|
+
parent_is_block = self.markdown_block?
|
25
|
+
|
26
|
+
# recurse over this element's children
|
27
|
+
content = self.children.inject([]) do |all, child|
|
28
|
+
current_is_block = child.markdown_block?
|
29
|
+
child_content = child.to_markdown(options)
|
30
|
+
|
31
|
+
# skip this node if its markdown is nil, empty or, in case
|
32
|
+
# that the previous element was a block, all-whitespace
|
33
|
+
unless child_content.nil? or child_content.empty? or (previous_was_block and child_content.blank?)
|
34
|
+
# handle separating of adjacent markdown blocks with an empty line
|
35
|
+
if not all.empty? and current_is_block or previous_was_block
|
36
|
+
# strip trailing whitespace if we're opening a new block
|
37
|
+
all.last.blank?? all.pop : all.last.rstrip!
|
38
|
+
# guard against adding a newline at the beginning
|
39
|
+
all << "\n\n" if all.any?
|
40
|
+
end
|
41
|
+
|
42
|
+
unless 'pre' == child.name
|
43
|
+
# strip whitespace from the left if ...
|
44
|
+
child_content.lstrip! if previous_was_block or # we're adjacent to a block
|
45
|
+
(parent_is_block and child == self.children.first) or # this is the first child
|
46
|
+
(not all.empty? and all.last =~ / ( \n)?$/) # we're following a space or a forced line break token
|
47
|
+
|
48
|
+
|
49
|
+
# strip whitespace from the right if this is the last node in a block
|
50
|
+
child_content.rstrip! if parent_is_block and self.children.last == child
|
51
|
+
end
|
52
|
+
|
53
|
+
all << child_content
|
54
|
+
end
|
55
|
+
|
56
|
+
previous_was_block = current_is_block
|
57
|
+
all
|
58
|
+
end
|
59
|
+
|
60
|
+
result = content.join('')
|
61
|
+
return result
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# elements without children
|
66
|
+
Hpricot::Leaf.module_eval do
|
67
|
+
def to_markdown(options = {})
|
68
|
+
inner_text.squeeze_whitespace if elem?
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
Hpricot::Elem.module_eval do
|
73
|
+
IGNORE = %w(script head style)
|
74
|
+
ALLOWED_EMPTY = %w(img br hr )
|
75
|
+
MARKDOWN_BLOCK = %w(p blockquote h1 h2 h3 h4 h5 h6 pre hr)
|
76
|
+
MARKDOWN_INLINE = %w(em strong code a img br)
|
77
|
+
MARKDOWN_RECOGNIZED = MARKDOWN_BLOCK + MARKDOWN_INLINE + %w(div)
|
78
|
+
HTML_BLOCK = MARKDOWN_BLOCK + %w(ul ol dl div noscript form table address fieldset)
|
79
|
+
|
80
|
+
def to_markdown(options = {})
|
81
|
+
return nil if markdown_ignored?(options)
|
82
|
+
return '' if markdown_empty?
|
83
|
+
return to_s unless markdown_supported_attributes?
|
84
|
+
|
85
|
+
case name
|
86
|
+
when 'div', 'noscript'
|
87
|
+
super
|
88
|
+
when 'p'
|
89
|
+
super
|
90
|
+
when /^h([1-6])$/
|
91
|
+
('#' * $1.to_i) + ' ' + super
|
92
|
+
when 'ul', 'ol'
|
93
|
+
remark_list(options)
|
94
|
+
when 'li'
|
95
|
+
content = super
|
96
|
+
content = content.indent if children.any? { |e| e.markdown_block? }
|
97
|
+
content
|
98
|
+
when 'pre'
|
99
|
+
inner_text.rstrip.indent
|
100
|
+
when 'em'
|
101
|
+
"_#{super}_"
|
102
|
+
when 'strong'
|
103
|
+
"**#{super}**"
|
104
|
+
when 'code'
|
105
|
+
code = inner_text
|
106
|
+
code.index('`') ? "`` #{code} ``" : "`#{code}`"
|
107
|
+
when 'a'
|
108
|
+
remark_link(super, attributes['href'], attributes['title'], options)
|
109
|
+
when 'img'
|
110
|
+
'!' + remark_link(attributes['alt'], attributes['src'], attributes['title'], :reference_links => false)
|
111
|
+
when 'blockquote'
|
112
|
+
super.indent('> ')
|
113
|
+
when 'br'
|
114
|
+
" \n" + inner_html
|
115
|
+
else
|
116
|
+
to_s
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
def remark_list(options = {})
|
121
|
+
unordered = self.name == 'ul'
|
122
|
+
marker = unordered ? '*' : 0
|
123
|
+
nested = false
|
124
|
+
|
125
|
+
items = self.children_of_type('li').map do |item|
|
126
|
+
item = item.to_markdown(options)
|
127
|
+
current = unordered ? marker : "#{marker += 1}."
|
128
|
+
if item =~ /\A\s/
|
129
|
+
nested = true
|
130
|
+
item[0, current.length] = current
|
131
|
+
item
|
132
|
+
else
|
133
|
+
current + ' ' + item
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
items.join("\n" * (nested ? 2 : 1))
|
138
|
+
end
|
139
|
+
|
140
|
+
def markdown_block?
|
141
|
+
HTML_BLOCK.include?(name)
|
142
|
+
end
|
143
|
+
|
144
|
+
def markdown_recognized?
|
145
|
+
MARKDOWN_RECOGNIZED.include?(name)
|
146
|
+
end
|
147
|
+
|
148
|
+
protected
|
149
|
+
|
150
|
+
def markdown_ignored?(options)
|
151
|
+
IGNORE.include?(name) or
|
152
|
+
(options[:ignored_elements] and options[:ignored_elements].include?(self))
|
153
|
+
end
|
154
|
+
|
155
|
+
def markdown_empty?
|
156
|
+
empty? and markdown_recognized? and not ALLOWED_EMPTY.include?(name)
|
157
|
+
end
|
158
|
+
|
159
|
+
def markdown_supported_attributes?
|
160
|
+
case name
|
161
|
+
when 'div'
|
162
|
+
true
|
163
|
+
when 'a'
|
164
|
+
attribute_names_match?('href', 'title')
|
165
|
+
when 'img'
|
166
|
+
attribute_names_match?(%w(alt src), 'title')
|
167
|
+
when 'ol', 'ul'
|
168
|
+
attributes.empty? and children.all? do |item|
|
169
|
+
not item.elem? or (item.name == 'li' and item.attributes.empty?)
|
170
|
+
end
|
171
|
+
else
|
172
|
+
attributes.empty?
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
def attribute_names_match?(only, optional = nil)
|
177
|
+
names = attributes.keys.sort
|
178
|
+
names -= Array(optional) if optional
|
179
|
+
names == Array(only)
|
180
|
+
end
|
181
|
+
|
182
|
+
def remark_link(text, href, title = nil, options = {})
|
183
|
+
if options[:reference_links]
|
184
|
+
if existing = options[:links].find { |h, t| href == h }
|
185
|
+
num = options[:links].index(existing) + 1
|
186
|
+
else
|
187
|
+
options[:links] << [href, title]
|
188
|
+
num = options[:links].length
|
189
|
+
end
|
190
|
+
"[#{text}][#{num}]"
|
191
|
+
else
|
192
|
+
title_markup = title ? %( "#{title}") : ''
|
193
|
+
"[#{text}](#{href}#{title_markup})"
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|
@@ -0,0 +1,138 @@
|
|
1
|
+
require 'remark/hpricot_ext'
|
2
|
+
|
3
|
+
describe Hpricot, "remark extensions" do
|
4
|
+
before(:all) do
|
5
|
+
@doc = Hpricot(<<-HTML.strip)
|
6
|
+
<?xml version="moo" ?>
|
7
|
+
<!DOCTYPE html>
|
8
|
+
<html>
|
9
|
+
<head>
|
10
|
+
<title>Sample document</title>
|
11
|
+
</head>
|
12
|
+
<body>
|
13
|
+
<h1>Sample <strong>Remark</strong> document</h1>
|
14
|
+
<p>
|
15
|
+
A paragraph with <em>nested</em> <strong>content</strong>
|
16
|
+
and <i>Remark</i>-supported elements.
|
17
|
+
</p>
|
18
|
+
|
19
|
+
<a name="content"> </a>
|
20
|
+
<h2>The content</h2>
|
21
|
+
<div id="content">
|
22
|
+
<p>First</p>
|
23
|
+
<p>Second</p>
|
24
|
+
Some content
|
25
|
+
<em>in-between</em>
|
26
|
+
<p>Third</p>
|
27
|
+
</div>
|
28
|
+
<p class="foo">I has classname</p>
|
29
|
+
|
30
|
+
<div id="empty"></div>
|
31
|
+
<blockquote>
|
32
|
+
Some famous quote
|
33
|
+
<blockquote>Nested famous quote</blockquote>
|
34
|
+
</blockquote>
|
35
|
+
<div class="code">
|
36
|
+
<p>Sample code:</p>
|
37
|
+
<pre>def preformatted
|
38
|
+
text
|
39
|
+
end
|
40
|
+
</pre>
|
41
|
+
</div>
|
42
|
+
<img src='moo.jpg' alt='cow'>
|
43
|
+
<img src='moo.jpg' alt='cow' width='16'>
|
44
|
+
|
45
|
+
<code>simple</code> <code>comp ` lex</code> <code><tag></code>
|
46
|
+
|
47
|
+
<div id="br">
|
48
|
+
<p>Foo<br>bar</p>
|
49
|
+
<p>Foo<br>
|
50
|
+
bar <code>baz</code></p>
|
51
|
+
<p>Foo</p><br><br><p>Bar</p><br>
|
52
|
+
</div>
|
53
|
+
|
54
|
+
<ul>
|
55
|
+
<li>First</li>
|
56
|
+
<li>Second</li>
|
57
|
+
</ul>
|
58
|
+
<ol>
|
59
|
+
<li>First</li>
|
60
|
+
<li>Second</li>
|
61
|
+
</ol>
|
62
|
+
</body>
|
63
|
+
</html>
|
64
|
+
HTML
|
65
|
+
end
|
66
|
+
|
67
|
+
def remark(elem, options = {})
|
68
|
+
(String === elem ? @doc.at(elem) : elem).to_markdown(options)
|
69
|
+
end
|
70
|
+
|
71
|
+
it "should return empty string for empty document" do
|
72
|
+
remark(Hpricot('')).should == ''
|
73
|
+
end
|
74
|
+
|
75
|
+
it "should ignore DOCTYPE, HEAD and XML processing instructions" do
|
76
|
+
remark('head').should be_nil
|
77
|
+
remark(@doc.children[0]).should be_nil # doctype
|
78
|
+
remark(@doc.children[2]).should be_nil # xmldecl
|
79
|
+
end
|
80
|
+
|
81
|
+
it "should have whitespace nodes respond to blank" do
|
82
|
+
@doc.at('a[@name]').children.first.blank?
|
83
|
+
end
|
84
|
+
|
85
|
+
it "should support headings" do
|
86
|
+
remark('h1').should == "# Sample **Remark** document"
|
87
|
+
remark('h2').should == "## The content"
|
88
|
+
end
|
89
|
+
|
90
|
+
it "should support paragraphs" do
|
91
|
+
remark('p').should == "A paragraph with _nested_ **content** and <i>Remark</i>-supported elements."
|
92
|
+
end
|
93
|
+
|
94
|
+
it "should split paragraphs with an empty line" do
|
95
|
+
remark('#content').should == "First\n\nSecond\n\nSome content _in-between_\n\nThird"
|
96
|
+
end
|
97
|
+
|
98
|
+
it "should keep full HTML for paragraphs if they have attributes" do
|
99
|
+
remark('p.foo').should == '<p class="foo">I has classname</p>'
|
100
|
+
end
|
101
|
+
|
102
|
+
it "should not break on empty DIV" do
|
103
|
+
remark('#empty').should == ""
|
104
|
+
end
|
105
|
+
|
106
|
+
it "should support blockquotes" do
|
107
|
+
remark('blockquote > blockquote').should == "> Nested famous quote"
|
108
|
+
remark('blockquote').should == "> Some famous quote\n> \n> > Nested famous quote"
|
109
|
+
end
|
110
|
+
|
111
|
+
it "should support preformatted text" do
|
112
|
+
remark('div.code').should == "Sample code:\n\n def preformatted\n text\n end"
|
113
|
+
end
|
114
|
+
|
115
|
+
it "should support image tags" do
|
116
|
+
remark('img[@alt]').should == ''
|
117
|
+
remark('img[@width]').should == '<img src="moo.jpg" alt="cow" width="16" />'
|
118
|
+
end
|
119
|
+
|
120
|
+
it "should support code spans" do
|
121
|
+
remark('code').should == "`simple`"
|
122
|
+
remark('code ~ code').should == "`` comp ` lex ``"
|
123
|
+
remark('code ~ code ~ code').should == "`<tag>`"
|
124
|
+
end
|
125
|
+
|
126
|
+
it "should support BR" do
|
127
|
+
remark('#br').should == "Foo \nbar\n\nFoo \nbar `baz`\n\nFoo\n\nBar"
|
128
|
+
end
|
129
|
+
|
130
|
+
it "should support unordered list" do
|
131
|
+
remark('ul').should == "* First\n* Second"
|
132
|
+
end
|
133
|
+
|
134
|
+
it "should support ordered list" do
|
135
|
+
remark('ol').should == "1. First\n2. Second"
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
data/spec/remark_spec.rb
CHANGED
@@ -1,8 +1,9 @@
|
|
1
1
|
require 'remark'
|
2
2
|
|
3
3
|
describe Remark do
|
4
|
-
def remark(source)
|
5
|
-
|
4
|
+
def remark(source, options = {})
|
5
|
+
options = {:reference_links => false}.merge(options)
|
6
|
+
described_class.new(source, options).to_markdown
|
6
7
|
end
|
7
8
|
|
8
9
|
it "should let through text content" do
|
@@ -10,17 +11,6 @@ describe Remark do
|
|
10
11
|
remark("Foo bar\nbaz").should == 'Foo bar baz'
|
11
12
|
end
|
12
13
|
|
13
|
-
it "should split paragraphs with an empty line" do
|
14
|
-
remark("<p>Foo bar</p>").should == 'Foo bar'
|
15
|
-
remark("<p>Foo bar</p><p>baz").should == "Foo bar\n\nbaz"
|
16
|
-
remark("<p>Foo bar</p>baz").should == "Foo bar\n\nbaz"
|
17
|
-
end
|
18
|
-
|
19
|
-
it "should output title syntax" do
|
20
|
-
remark("<h1>Foo bar</h1>").should == '# Foo bar'
|
21
|
-
remark("<h2>Foo bar</h2>").should == '## Foo bar'
|
22
|
-
end
|
23
|
-
|
24
14
|
it "should preserve elements in remarked blocks" do
|
25
15
|
remark("<p>Foo <ins>bar</ins></p>").should == 'Foo <ins>bar</ins>'
|
26
16
|
remark("<h2>Foo <ins>bar</ins></h2>").should == '## Foo <ins>bar</ins>'
|
@@ -31,15 +21,6 @@ describe Remark do
|
|
31
21
|
remark("<p>If you’re doing all your development on the “master” branch, you’re not using git").should == "If you’re doing all your development on the “master” branch, you’re not using git"
|
32
22
|
end
|
33
23
|
|
34
|
-
it "should ignore tags without user-facing content" do
|
35
|
-
remark("<script>foo</script>").should == ''
|
36
|
-
remark("<head>foo</head>").should == ''
|
37
|
-
end
|
38
|
-
|
39
|
-
it "should leave known elements with attributes intact" do
|
40
|
-
remark("<p class='notice'>Kittens attack!</p>").should == '<p class="notice">Kittens attack!</p>'
|
41
|
-
end
|
42
|
-
|
43
24
|
it "should leave unknown elements intact" do
|
44
25
|
remark(<<-HTML).should == "Foo\n\n<table>data</table>\n\nBar"
|
45
26
|
<p>Foo</p>
|
@@ -48,109 +29,129 @@ describe Remark do
|
|
48
29
|
HTML
|
49
30
|
end
|
50
31
|
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
32
|
+
describe "whitespace" do
|
33
|
+
it "should strip excess whitespace" do
|
34
|
+
remark(<<-HTML).should == "Foo bar"
|
35
|
+
<p>
|
36
|
+
Foo
|
37
|
+
bar
|
38
|
+
</p>
|
39
|
+
HTML
|
40
|
+
end
|
59
41
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
<p>Foo</p>
|
42
|
+
it "should strip whitespace in text nodes between processed nodes" do
|
43
|
+
remark(<<-HTML).should == "Foo\n\nbar\n\nBaz"
|
44
|
+
<p>Foo</p>
|
64
45
|
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
<
|
75
|
-
|
76
|
-
|
46
|
+
bar
|
47
|
+
<p>Baz</p>
|
48
|
+
HTML
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
describe "lists" do
|
53
|
+
it "should support lists" do
|
54
|
+
remark(<<-HTML).should == "* foo\n* bar"
|
55
|
+
<ul>
|
56
|
+
<li>foo</li>
|
57
|
+
<li>bar</li>
|
58
|
+
</ul>
|
59
|
+
HTML
|
77
60
|
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
61
|
+
remark(<<-HTML).should == "1. foo\n2. bar"
|
62
|
+
<ol>
|
63
|
+
<li>foo</li>
|
64
|
+
<li>bar</li>
|
65
|
+
</ol>
|
66
|
+
HTML
|
67
|
+
end
|
68
|
+
|
69
|
+
it "should support lists with nested content" do
|
70
|
+
remark(<<-HTML).should == "* foo\n \n bar\n\n* baz"
|
71
|
+
<ul>
|
72
|
+
<li><p>foo</p><p>bar</p></li>
|
73
|
+
<li><p>baz</p></li>
|
74
|
+
</ul>
|
75
|
+
HTML
|
76
|
+
end
|
77
|
+
|
78
|
+
it "should output malformed lists as HTML" do
|
79
|
+
remark(<<-HTML).should == "<ul>\n <span>bar</span>\n </ul>"
|
80
|
+
<ul>
|
81
|
+
<span>bar</span>
|
82
|
+
</ul>
|
83
|
+
HTML
|
84
|
+
end
|
93
85
|
end
|
94
86
|
|
95
87
|
it "should support preformatted blocks" do
|
96
88
|
remark("<pre>def foo\n bar\nend</pre>").should == " def foo\n bar\n end"
|
97
89
|
remark("<pre><code>def foo\n <bar>\nend</code></pre>").should == " def foo\n <bar>\n end"
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
remark("<p>Foo</
|
133
|
-
end
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
<
|
139
|
-
<
|
140
|
-
<
|
141
|
-
|
142
|
-
|
143
|
-
<
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
<
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
90
|
+
remark("<pre>def foo\n</pre>").should == " def foo"
|
91
|
+
end
|
92
|
+
|
93
|
+
describe "inline" do
|
94
|
+
it "should remark inline elements" do
|
95
|
+
remark("<p>I'm so <strong>strong</strong></p>").should == "I'm so **strong**"
|
96
|
+
remark("<p>I'm so <em>emo</em></p>").should == "I'm so _emo_"
|
97
|
+
remark("<ul><li><em>Inline</em> stuff in <strong>lists</strong></li></ul>").should == "* _Inline_ stuff in **lists**"
|
98
|
+
remark("<h1>Headings <em>too</em></h1>").should == '# Headings _too_'
|
99
|
+
end
|
100
|
+
|
101
|
+
it "should handle nested inline elements" do
|
102
|
+
remark("<p>I <strong>love <code>code</code></strong></p>").should == "I **love `code`**"
|
103
|
+
remark("<p>I <a href='#'>am <em>fine</em></a></p>").should == "I [am _fine_](#)"
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
describe "hyperlinks" do
|
108
|
+
it "should support hyperlinks" do
|
109
|
+
remark("<p>Click <a href='http://mislav.uniqpath.com'>here</a></p>").should ==
|
110
|
+
"Click [here](http://mislav.uniqpath.com)"
|
111
|
+
remark("<a href='/foo' title='bar'>baz</a>").should == '[baz](/foo "bar")'
|
112
|
+
end
|
113
|
+
|
114
|
+
it "should have reference-style hyperlinks" do
|
115
|
+
remark("<p>Click <a href='foo' title='mooslav'>here</a> and <a href='bar'>there</a></p>", :reference_links => true).should ==
|
116
|
+
"Click [here][1] and [there][2]\n\n\n[1]: foo \"mooslav\"\n[2]: bar"
|
117
|
+
remark("<p>Click <a href='foo'>here</a> and <a href='foo'>there</a></p>", :reference_links => true).should ==
|
118
|
+
"Click [here][1] and [there][1]\n\n\n[1]: foo"
|
119
|
+
remark("", :reference_links => true).should == ""
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
it "should support ignores" do
|
124
|
+
remark("<p>Foo <span>bar</span> baz</p>", :ignores => ['span']).should == "Foo baz"
|
125
|
+
end
|
126
|
+
|
127
|
+
describe "scoping" do
|
128
|
+
before do
|
129
|
+
@html = <<-HTML
|
130
|
+
<html>
|
131
|
+
<body>
|
132
|
+
<div id="div1">
|
133
|
+
<p>Only 1 paragraph</p>
|
134
|
+
</div>
|
135
|
+
<div id="div3">
|
136
|
+
<p>Wow, 3 paragraphs</p>
|
137
|
+
<p>This must be where the content is</p>
|
138
|
+
<p>I'm sure</p>
|
139
|
+
</div>
|
140
|
+
<div id="div2">
|
141
|
+
<p>Only 2 paragraphs</p>
|
142
|
+
<p>How disappointing</p>
|
143
|
+
</div>
|
144
|
+
</body>
|
145
|
+
</html>
|
146
|
+
HTML
|
147
|
+
end
|
148
|
+
|
149
|
+
it "should scope to the most likely element that holds content" do
|
150
|
+
remark(@html).should == "Wow, 3 paragraphs\n\nThis must be where the content is\n\nI'm sure"
|
151
|
+
end
|
152
|
+
|
153
|
+
it "should scope to the explicit scope" do
|
154
|
+
remark(@html, :scope => '#div2').should == "Only 2 paragraphs\n\nHow disappointing"
|
155
|
+
end
|
154
156
|
end
|
155
157
|
end
|
156
|
-
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mislav-remark
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- "Mislav Marohni\xC4\x87"
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-07-30 00:00:00 -07:00
|
13
13
|
default_executable: remark
|
14
14
|
dependencies: []
|
15
15
|
|
@@ -24,12 +24,16 @@ extra_rdoc_files: []
|
|
24
24
|
files:
|
25
25
|
- Rakefile
|
26
26
|
- bin/remark
|
27
|
+
- lib/remark/core_ext.rb
|
28
|
+
- lib/remark/hpricot_ext.rb
|
27
29
|
- lib/remark.rb
|
30
|
+
- spec/hpricot_ext_spec.rb
|
28
31
|
- spec/remark_spec.rb
|
29
32
|
- spec/sample.html
|
30
33
|
- README.markdown
|
31
34
|
has_rdoc: false
|
32
35
|
homepage: http://github.com/mislav/remark
|
36
|
+
licenses:
|
33
37
|
post_install_message:
|
34
38
|
rdoc_options: []
|
35
39
|
|
@@ -50,7 +54,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
50
54
|
requirements: []
|
51
55
|
|
52
56
|
rubyforge_project:
|
53
|
-
rubygems_version: 1.
|
57
|
+
rubygems_version: 1.3.5
|
54
58
|
signing_key:
|
55
59
|
specification_version: 3
|
56
60
|
summary: HTML to Markdown converter
|