mislav-remark 0.2.1 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -1,14 +1,20 @@
1
+ desc "renders the spec/sample.html to Markdown"
2
+ task :sample do
3
+ system %(ruby -Ilib -rubygems bin/remark spec/sample.html)
4
+ end
5
+
1
6
  desc "generates .gemspec file"
2
7
  task :gemspec do
3
8
  spec = Gem::Specification.new do |gem|
4
9
  gem.name = "remark"
10
+ gem.version = '0.3.0'
11
+
5
12
  gem.summary = "HTML to Markdown converter"
6
13
  gem.email = "mislav.marohnic@gmail.com"
7
14
  gem.homepage = "http://github.com/mislav/remark"
8
15
  gem.authors = ["Mislav Marohnić"]
9
16
  gem.has_rdoc = false
10
17
 
11
- gem.version = '0.2.1'
12
18
  gem.files = FileList['Rakefile', '{bin,lib,rails,spec}/**/*', 'README*', 'LICENSE*'] & `git ls-files`.split("\n")
13
19
  gem.executables = Dir['bin/*'].map { |f| File.basename(f) }
14
20
  end
data/bin/remark CHANGED
@@ -1,4 +1,22 @@
1
1
  #!/usr/bin/env ruby
2
+ require 'optparse'
2
3
  require 'remark'
3
4
 
4
- puts Remark.new(ARGF.read).to_markdown
5
+ options = {}
6
+ OptionParser.new do |opts|
7
+ opts.banner = "Usage: remark [options] [FILE]"
8
+
9
+ opts.on("-n", "--inline-links", "Render link URLs inline (instead of reference-style)") do |inline|
10
+ options[:reference_links] = !inline
11
+ end
12
+
13
+ opts.on("-s", "--scope EXPR", "Scope to a spefic CSS/XPath expression in the HTML document") do |scope|
14
+ options[:scope] = scope
15
+ end
16
+
17
+ opts.on("-i", "--ignore EXPR", "Ignore elements that match CSS/XPath expression") do |expr|
18
+ (options[:ignores] ||= []) << expr
19
+ end
20
+ end.parse!
21
+
22
+ puts Remark.new(ARGF.read, options).to_markdown
@@ -1,16 +1,27 @@
1
- require 'hpricot'
1
+ require 'remark/hpricot_ext'
2
2
 
3
3
  class Remark
4
- def initialize(source)
4
+ DEFAULT_OPTIONS = { :reference_links => true }
5
+
6
+ def initialize(source, options = {})
5
7
  @doc = Hpricot(source)
8
+ @options = DEFAULT_OPTIONS.merge options
9
+ @links = []
10
+ @ignored_elements = nil
6
11
  end
7
12
 
8
13
  def to_markdown
9
- remark_block(scope)
14
+ parent = scope
15
+ collect_ignored_elements(parent)
16
+ links = @options[:links] = [] unless inline_links?
17
+ result = parent.to_markdown(@options)
18
+ result + (inline_links? || links.empty?? '' : "\n\n\n" + output_reference_links(links))
10
19
  end
11
20
 
12
21
  def scope
13
- if body = @doc.at('/html/body')
22
+ if scope = @options[:scope]
23
+ @doc.at(scope)
24
+ elsif body = @doc.at('/html/body')
14
25
  candidates = (body / 'p').inject(Hash.new(0)) do |memo, para|
15
26
  memo[para.parent] += 1
16
27
  memo
@@ -22,135 +33,29 @@ class Remark
22
33
  end
23
34
  end
24
35
 
25
- IGNORE = %w(script head style)
26
- BLOCK = %w(p blockquote h1 h2 h3 h4 h5 h6 pre)
27
-
28
- private
29
-
30
- def valid_attributes?(elem)
31
- case elem.name
32
- when 'a'
33
- (elem.attributes.keys - %w(title)) == %w(href)
34
- when 'img'
35
- (elem.attributes.keys - %w(title)).sort == %w(alt src)
36
- else
37
- elem.attributes.empty?
38
- end
39
- end
40
-
41
- def remark_block(elem)
42
- remark_children(elem).
43
- reject { |item| item.blank? }.
44
- join("\n\n")
36
+ def inline_links?
37
+ !@options[:reference_links]
45
38
  end
46
39
 
47
- def remark_children(node)
48
- remarked = []
49
- node.children.each do |item|
50
- result = remark_item(item)
51
- remarked << result if result
52
- end
53
- remarked
54
- end
55
-
56
- def remark_item(item)
57
- if item.text?
58
- item.to_s.gsub(/\n+/, ' ') unless item.blank?
59
- elsif item.elem?
60
- if IGNORE.include?(item.name)
61
- nil
62
- elsif valid_attributes?(item)
63
- remark_element(item)
64
- else
65
- item
66
- end
67
- end
68
- end
69
-
70
- def remark_element(elem)
71
- case elem.name
72
- when 'p'
73
- remark_inline(elem)
74
- when /^h([1-6])$/
75
- ('#' * $1.to_i) + ' ' + remark_inline(elem)
76
- when 'ul', 'ol'
77
- remark_list(elem)
78
- when 'li'
79
- elem.children.any? { |e| e.elem? and BLOCK.include?(e.name) } ?
80
- remark_block(elem).indent : remark_inline(elem)
81
- when 'pre'
82
- elem.inner_text.indent
83
- when 'em'
84
- "_#{elem.inner_text}_"
85
- when 'strong'
86
- "**#{elem.inner_text}**"
87
- when 'code'
88
- code = elem.inner_text
89
- code.index('`') ? "`` #{code} ``" : "`#{code}`"
90
- when 'a'
91
- remark_link(elem.inner_html, elem.attributes['href'], elem.attributes['title'])
92
- when 'img'
93
- '!' + remark_link(elem.attributes['alt'], elem.attributes['src'], elem.attributes['title'])
94
- when 'blockquote'
95
- remark_children(elem).join("\n\n").indent('> ')
96
- when 'br'
97
- " \n" + elem.inner_html
98
- else
99
- elem
40
+ def output_reference_links(links)
41
+ references = []
42
+ links.each_with_index do |(href, title), i|
43
+ references << "[#{i + 1}]: #{href}#{title ? ' ' + title.inspect : ''}"
100
44
  end
45
+ references.join("\n")
101
46
  end
102
47
 
103
- def remark_link(text, href, title = nil)
104
- title_markup = title ? %( "#{title}") : ''
105
- "[#{text}](#{href}#{title_markup})"
106
- end
48
+ private
107
49
 
108
- def remark_inline(elem)
109
- remark_children(elem).join('').strip.gsub(/ {2,}(?!\n)/, ' ').gsub(/(\n) +/, '\1')
50
+ def ignore_element?(elem)
51
+ IGNORE.include?(elem.name) or (@ignored_elements and @ignored_elements.include?(elem))
110
52
  end
111
53
 
112
- def remark_list(list)
113
- unordered = list.name == 'ul'
114
- marker = unordered ? '*' : 0
115
- nested = false
116
-
117
- items = remark_children(list).map do |item|
118
- current = unordered ? marker : "#{marker += 1}."
119
- if item =~ /\A\s/
120
- nested = true
121
- item[0, current.length] = current
122
- item
123
- else
124
- current + ' ' + item
125
- end
54
+ def collect_ignored_elements(scope)
55
+ if @options[:ignores]
56
+ @options[:ignored_elements] = @options[:ignores].map do |expr|
57
+ scope.search(expr).to_a
58
+ end.flatten.uniq
126
59
  end
127
-
128
- items.join("\n" * (nested ? 2 : 1))
129
60
  end
130
61
  end
131
-
132
- Object.class_eval do
133
- def blank?() false end
134
- end
135
-
136
- NilClass.class_eval do
137
- def blank?() true end
138
- end
139
-
140
- String.class_eval do
141
- def blank?
142
- self.empty? or !!(self =~ /\A\s+\Z/)
143
- end
144
-
145
- def squish
146
- self.strip.gsub!(/\s+/, ' ')
147
- end
148
-
149
- def indent(with = ' ' * 4)
150
- self.gsub(/^/, with)
151
- end
152
- end
153
-
154
- Hpricot::Text.class_eval do
155
- def blank?() to_s.blank? end
156
- end
@@ -0,0 +1,25 @@
1
+ Object.class_eval do
2
+ def blank?() false end
3
+ end
4
+
5
+ NilClass.class_eval do
6
+ def blank?() true end
7
+ end
8
+
9
+ String.class_eval do
10
+ def blank?
11
+ self.empty? or !!(self =~ /\A\s+\Z/)
12
+ end
13
+
14
+ def squeeze_whitespace
15
+ self.tr("\n\t", ' ').squeeze(' ')
16
+ end
17
+
18
+ def indent(with = ' ' * 4)
19
+ self.gsub(/^/, with)
20
+ end
21
+ end
22
+
23
+ Hpricot::Text.module_eval do
24
+ def blank?() to_s.blank? end
25
+ end
@@ -0,0 +1,196 @@
1
+ require 'hpricot'
2
+ require 'remark/core_ext'
3
+
4
+ # this applies the default behavior to virtually all Hpricot classes
5
+ Hpricot::Node.module_eval do
6
+ def to_markdown(options = {}) nil end
7
+ def markdown_block?() false end
8
+ end
9
+
10
+ # nothing special to process on Text or CData
11
+ Hpricot::Text.module_eval do
12
+ def to_markdown(options = {}) to_s.squeeze_whitespace end
13
+ end
14
+
15
+ Hpricot::CData.module_eval do
16
+ def to_markdown(options = {}) to_s.squeeze_whitespace end
17
+ end
18
+
19
+ # elements that have children
20
+ Hpricot::Container.module_eval do
21
+ def to_markdown(options = {})
22
+ return '' unless self.children
23
+ previous_was_block = false
24
+ parent_is_block = self.markdown_block?
25
+
26
+ # recurse over this element's children
27
+ content = self.children.inject([]) do |all, child|
28
+ current_is_block = child.markdown_block?
29
+ child_content = child.to_markdown(options)
30
+
31
+ # skip this node if its markdown is nil, empty or, in case
32
+ # that the previous element was a block, all-whitespace
33
+ unless child_content.nil? or child_content.empty? or (previous_was_block and child_content.blank?)
34
+ # handle separating of adjacent markdown blocks with an empty line
35
+ if not all.empty? and current_is_block or previous_was_block
36
+ # strip trailing whitespace if we're opening a new block
37
+ all.last.blank?? all.pop : all.last.rstrip!
38
+ # guard against adding a newline at the beginning
39
+ all << "\n\n" if all.any?
40
+ end
41
+
42
+ unless 'pre' == child.name
43
+ # strip whitespace from the left if ...
44
+ child_content.lstrip! if previous_was_block or # we're adjacent to a block
45
+ (parent_is_block and child == self.children.first) or # this is the first child
46
+ (not all.empty? and all.last =~ / ( \n)?$/) # we're following a space or a forced line break token
47
+
48
+
49
+ # strip whitespace from the right if this is the last node in a block
50
+ child_content.rstrip! if parent_is_block and self.children.last == child
51
+ end
52
+
53
+ all << child_content
54
+ end
55
+
56
+ previous_was_block = current_is_block
57
+ all
58
+ end
59
+
60
+ result = content.join('')
61
+ return result
62
+ end
63
+ end
64
+
65
+ # elements without children
66
+ Hpricot::Leaf.module_eval do
67
+ def to_markdown(options = {})
68
+ inner_text.squeeze_whitespace if elem?
69
+ end
70
+ end
71
+
72
+ Hpricot::Elem.module_eval do
73
+ IGNORE = %w(script head style)
74
+ ALLOWED_EMPTY = %w(img br hr )
75
+ MARKDOWN_BLOCK = %w(p blockquote h1 h2 h3 h4 h5 h6 pre hr)
76
+ MARKDOWN_INLINE = %w(em strong code a img br)
77
+ MARKDOWN_RECOGNIZED = MARKDOWN_BLOCK + MARKDOWN_INLINE + %w(div)
78
+ HTML_BLOCK = MARKDOWN_BLOCK + %w(ul ol dl div noscript form table address fieldset)
79
+
80
+ def to_markdown(options = {})
81
+ return nil if markdown_ignored?(options)
82
+ return '' if markdown_empty?
83
+ return to_s unless markdown_supported_attributes?
84
+
85
+ case name
86
+ when 'div', 'noscript'
87
+ super
88
+ when 'p'
89
+ super
90
+ when /^h([1-6])$/
91
+ ('#' * $1.to_i) + ' ' + super
92
+ when 'ul', 'ol'
93
+ remark_list(options)
94
+ when 'li'
95
+ content = super
96
+ content = content.indent if children.any? { |e| e.markdown_block? }
97
+ content
98
+ when 'pre'
99
+ inner_text.rstrip.indent
100
+ when 'em'
101
+ "_#{super}_"
102
+ when 'strong'
103
+ "**#{super}**"
104
+ when 'code'
105
+ code = inner_text
106
+ code.index('`') ? "`` #{code} ``" : "`#{code}`"
107
+ when 'a'
108
+ remark_link(super, attributes['href'], attributes['title'], options)
109
+ when 'img'
110
+ '!' + remark_link(attributes['alt'], attributes['src'], attributes['title'], :reference_links => false)
111
+ when 'blockquote'
112
+ super.indent('> ')
113
+ when 'br'
114
+ " \n" + inner_html
115
+ else
116
+ to_s
117
+ end
118
+ end
119
+
120
+ def remark_list(options = {})
121
+ unordered = self.name == 'ul'
122
+ marker = unordered ? '*' : 0
123
+ nested = false
124
+
125
+ items = self.children_of_type('li').map do |item|
126
+ item = item.to_markdown(options)
127
+ current = unordered ? marker : "#{marker += 1}."
128
+ if item =~ /\A\s/
129
+ nested = true
130
+ item[0, current.length] = current
131
+ item
132
+ else
133
+ current + ' ' + item
134
+ end
135
+ end
136
+
137
+ items.join("\n" * (nested ? 2 : 1))
138
+ end
139
+
140
+ def markdown_block?
141
+ HTML_BLOCK.include?(name)
142
+ end
143
+
144
+ def markdown_recognized?
145
+ MARKDOWN_RECOGNIZED.include?(name)
146
+ end
147
+
148
+ protected
149
+
150
+ def markdown_ignored?(options)
151
+ IGNORE.include?(name) or
152
+ (options[:ignored_elements] and options[:ignored_elements].include?(self))
153
+ end
154
+
155
+ def markdown_empty?
156
+ empty? and markdown_recognized? and not ALLOWED_EMPTY.include?(name)
157
+ end
158
+
159
+ def markdown_supported_attributes?
160
+ case name
161
+ when 'div'
162
+ true
163
+ when 'a'
164
+ attribute_names_match?('href', 'title')
165
+ when 'img'
166
+ attribute_names_match?(%w(alt src), 'title')
167
+ when 'ol', 'ul'
168
+ attributes.empty? and children.all? do |item|
169
+ not item.elem? or (item.name == 'li' and item.attributes.empty?)
170
+ end
171
+ else
172
+ attributes.empty?
173
+ end
174
+ end
175
+
176
+ def attribute_names_match?(only, optional = nil)
177
+ names = attributes.keys.sort
178
+ names -= Array(optional) if optional
179
+ names == Array(only)
180
+ end
181
+
182
+ def remark_link(text, href, title = nil, options = {})
183
+ if options[:reference_links]
184
+ if existing = options[:links].find { |h, t| href == h }
185
+ num = options[:links].index(existing) + 1
186
+ else
187
+ options[:links] << [href, title]
188
+ num = options[:links].length
189
+ end
190
+ "[#{text}][#{num}]"
191
+ else
192
+ title_markup = title ? %( "#{title}") : ''
193
+ "[#{text}](#{href}#{title_markup})"
194
+ end
195
+ end
196
+ end
@@ -0,0 +1,138 @@
1
+ require 'remark/hpricot_ext'
2
+
3
+ describe Hpricot, "remark extensions" do
4
+ before(:all) do
5
+ @doc = Hpricot(<<-HTML.strip)
6
+ <?xml version="moo" ?>
7
+ <!DOCTYPE html>
8
+ <html>
9
+ <head>
10
+ <title>Sample document</title>
11
+ </head>
12
+ <body>
13
+ <h1>Sample <strong>Remark</strong> document</h1>
14
+ <p>
15
+ A paragraph with <em>nested</em> <strong>content</strong>
16
+ and <i>Remark</i>-supported elements.
17
+ </p>
18
+
19
+ <a name="content"> </a>
20
+ <h2>The content</h2>
21
+ <div id="content">
22
+ <p>First</p>
23
+ <p>Second</p>
24
+ Some content
25
+ <em>in-between</em>
26
+ <p>Third</p>
27
+ </div>
28
+ <p class="foo">I has classname</p>
29
+
30
+ <div id="empty"></div>
31
+ <blockquote>
32
+ Some famous quote
33
+ <blockquote>Nested famous quote</blockquote>
34
+ </blockquote>
35
+ <div class="code">
36
+ <p>Sample code:</p>
37
+ <pre>def preformatted
38
+ text
39
+ end
40
+ </pre>
41
+ </div>
42
+ <img src='moo.jpg' alt='cow'>
43
+ <img src='moo.jpg' alt='cow' width='16'>
44
+
45
+ <code>simple</code> <code>comp ` lex</code> <code>&lt;tag&gt;</code>
46
+
47
+ <div id="br">
48
+ <p>Foo<br>bar</p>
49
+ <p>Foo<br>
50
+ bar <code>baz</code></p>
51
+ <p>Foo</p><br><br><p>Bar</p><br>
52
+ </div>
53
+
54
+ <ul>
55
+ <li>First</li>
56
+ <li>Second</li>
57
+ </ul>
58
+ <ol>
59
+ <li>First</li>
60
+ <li>Second</li>
61
+ </ol>
62
+ </body>
63
+ </html>
64
+ HTML
65
+ end
66
+
67
+ def remark(elem, options = {})
68
+ (String === elem ? @doc.at(elem) : elem).to_markdown(options)
69
+ end
70
+
71
+ it "should return empty string for empty document" do
72
+ remark(Hpricot('')).should == ''
73
+ end
74
+
75
+ it "should ignore DOCTYPE, HEAD and XML processing instructions" do
76
+ remark('head').should be_nil
77
+ remark(@doc.children[0]).should be_nil # doctype
78
+ remark(@doc.children[2]).should be_nil # xmldecl
79
+ end
80
+
81
+ it "should have whitespace nodes respond to blank" do
82
+ @doc.at('a[@name]').children.first.blank?
83
+ end
84
+
85
+ it "should support headings" do
86
+ remark('h1').should == "# Sample **Remark** document"
87
+ remark('h2').should == "## The content"
88
+ end
89
+
90
+ it "should support paragraphs" do
91
+ remark('p').should == "A paragraph with _nested_ **content** and <i>Remark</i>-supported elements."
92
+ end
93
+
94
+ it "should split paragraphs with an empty line" do
95
+ remark('#content').should == "First\n\nSecond\n\nSome content _in-between_\n\nThird"
96
+ end
97
+
98
+ it "should keep full HTML for paragraphs if they have attributes" do
99
+ remark('p.foo').should == '<p class="foo">I has classname</p>'
100
+ end
101
+
102
+ it "should not break on empty DIV" do
103
+ remark('#empty').should == ""
104
+ end
105
+
106
+ it "should support blockquotes" do
107
+ remark('blockquote > blockquote').should == "> Nested famous quote"
108
+ remark('blockquote').should == "> Some famous quote\n> \n> > Nested famous quote"
109
+ end
110
+
111
+ it "should support preformatted text" do
112
+ remark('div.code').should == "Sample code:\n\n def preformatted\n text\n end"
113
+ end
114
+
115
+ it "should support image tags" do
116
+ remark('img[@alt]').should == '![cow](moo.jpg)'
117
+ remark('img[@width]').should == '<img src="moo.jpg" alt="cow" width="16" />'
118
+ end
119
+
120
+ it "should support code spans" do
121
+ remark('code').should == "`simple`"
122
+ remark('code ~ code').should == "`` comp ` lex ``"
123
+ remark('code ~ code ~ code').should == "`<tag>`"
124
+ end
125
+
126
+ it "should support BR" do
127
+ remark('#br').should == "Foo \nbar\n\nFoo \nbar `baz`\n\nFoo\n\nBar"
128
+ end
129
+
130
+ it "should support unordered list" do
131
+ remark('ul').should == "* First\n* Second"
132
+ end
133
+
134
+ it "should support ordered list" do
135
+ remark('ol').should == "1. First\n2. Second"
136
+ end
137
+ end
138
+
@@ -1,8 +1,9 @@
1
1
  require 'remark'
2
2
 
3
3
  describe Remark do
4
- def remark(source)
5
- described_class.new(source).to_markdown
4
+ def remark(source, options = {})
5
+ options = {:reference_links => false}.merge(options)
6
+ described_class.new(source, options).to_markdown
6
7
  end
7
8
 
8
9
  it "should let through text content" do
@@ -10,17 +11,6 @@ describe Remark do
10
11
  remark("Foo bar\nbaz").should == 'Foo bar baz'
11
12
  end
12
13
 
13
- it "should split paragraphs with an empty line" do
14
- remark("<p>Foo bar</p>").should == 'Foo bar'
15
- remark("<p>Foo bar</p><p>baz").should == "Foo bar\n\nbaz"
16
- remark("<p>Foo bar</p>baz").should == "Foo bar\n\nbaz"
17
- end
18
-
19
- it "should output title syntax" do
20
- remark("<h1>Foo bar</h1>").should == '# Foo bar'
21
- remark("<h2>Foo bar</h2>").should == '## Foo bar'
22
- end
23
-
24
14
  it "should preserve elements in remarked blocks" do
25
15
  remark("<p>Foo <ins>bar</ins></p>").should == 'Foo <ins>bar</ins>'
26
16
  remark("<h2>Foo <ins>bar</ins></h2>").should == '## Foo <ins>bar</ins>'
@@ -31,15 +21,6 @@ describe Remark do
31
21
  remark("<p>If you&#8217;re doing all your development on the &#8220;master&#8221; branch, you&#8217;re not using git").should == "If you’re doing all your development on the “master” branch, you’re not using git"
32
22
  end
33
23
 
34
- it "should ignore tags without user-facing content" do
35
- remark("<script>foo</script>").should == ''
36
- remark("<head>foo</head>").should == ''
37
- end
38
-
39
- it "should leave known elements with attributes intact" do
40
- remark("<p class='notice'>Kittens attack!</p>").should == '<p class="notice">Kittens attack!</p>'
41
- end
42
-
43
24
  it "should leave unknown elements intact" do
44
25
  remark(<<-HTML).should == "Foo\n\n<table>data</table>\n\nBar"
45
26
  <p>Foo</p>
@@ -48,109 +29,129 @@ describe Remark do
48
29
  HTML
49
30
  end
50
31
 
51
- it "should strip excess whitespace" do
52
- remark(<<-HTML).should == "Foo bar"
53
- <p>
54
- Foo
55
- bar
56
- </p>
57
- HTML
58
- end
32
+ describe "whitespace" do
33
+ it "should strip excess whitespace" do
34
+ remark(<<-HTML).should == "Foo bar"
35
+ <p>
36
+ Foo
37
+ bar
38
+ </p>
39
+ HTML
40
+ end
59
41
 
60
- it "should strip whitespace in text nodes between main content" do
61
- pending
62
- remark(<<-HTML).should == "Foo\n\nbar\n\nBaz"
63
- <p>Foo</p>
42
+ it "should strip whitespace in text nodes between processed nodes" do
43
+ remark(<<-HTML).should == "Foo\n\nbar\n\nBaz"
44
+ <p>Foo</p>
64
45
 
65
- bar
66
- <p>Baz</p>
67
- HTML
68
- end
69
-
70
- it "should support lists" do
71
- remark(<<-HTML).should == "* foo\n* bar"
72
- <ul>
73
- <li>foo</li>
74
- <li>bar</li>
75
- </ul>
76
- HTML
46
+ bar
47
+ <p>Baz</p>
48
+ HTML
49
+ end
50
+ end
51
+
52
+ describe "lists" do
53
+ it "should support lists" do
54
+ remark(<<-HTML).should == "* foo\n* bar"
55
+ <ul>
56
+ <li>foo</li>
57
+ <li>bar</li>
58
+ </ul>
59
+ HTML
77
60
 
78
- remark(<<-HTML).should == "1. foo\n2. bar"
79
- <ol>
80
- <li>foo</li>
81
- <li>bar</li>
82
- </ol>
83
- HTML
84
- end
85
-
86
- it "should support lists with nested content" do
87
- remark(<<-HTML).should == "* foo\n \n bar\n\n* baz"
88
- <ul>
89
- <li><p>foo</p><p>bar</p></li>
90
- <li><p>baz</p></li>
91
- </ul>
92
- HTML
61
+ remark(<<-HTML).should == "1. foo\n2. bar"
62
+ <ol>
63
+ <li>foo</li>
64
+ <li>bar</li>
65
+ </ol>
66
+ HTML
67
+ end
68
+
69
+ it "should support lists with nested content" do
70
+ remark(<<-HTML).should == "* foo\n \n bar\n\n* baz"
71
+ <ul>
72
+ <li><p>foo</p><p>bar</p></li>
73
+ <li><p>baz</p></li>
74
+ </ul>
75
+ HTML
76
+ end
77
+
78
+ it "should output malformed lists as HTML" do
79
+ remark(<<-HTML).should == "<ul>\n <span>bar</span>\n </ul>"
80
+ <ul>
81
+ <span>bar</span>
82
+ </ul>
83
+ HTML
84
+ end
93
85
  end
94
86
 
95
87
  it "should support preformatted blocks" do
96
88
  remark("<pre>def foo\n bar\nend</pre>").should == " def foo\n bar\n end"
97
89
  remark("<pre><code>def foo\n &lt;bar&gt;\nend</code></pre>").should == " def foo\n <bar>\n end"
98
- end
99
-
100
- it "should remark inline elements" do
101
- remark("<p>I'm so <strong>strong</strong></p>").should == "I'm so **strong**"
102
- remark("<p>I'm so <em>emo</em></p>").should == "I'm so _emo_"
103
- remark("<ul><li><em>Inline</em> stuff in <strong>lists</strong></li></ul>").should == "* _Inline_ stuff in **lists**"
104
- remark("<h1>Headings <em>too</em></h1>").should == '# Headings _too_'
105
- end
106
-
107
- it "should remark inline code" do
108
- remark("<p>Write more <code>code</code></p>").should == "Write more `code`"
109
- remark("<p>Even with <code>`backticks`</code></p>").should == "Even with `` `backticks` ``"
110
- remark("<p>Or HTML <code>&lt;tags&gt;</code></p>").should == "Or HTML `<tags>`"
111
- end
112
-
113
- it "should support hyperlinks" do
114
- remark("<p>Click <a href='http://mislav.uniqpath.com'>here</a></p>").should ==
115
- "Click [here](http://mislav.uniqpath.com)"
116
- remark("<a href='/foo' title='bar'>baz</a>").should == '[baz](/foo "bar")'
117
- end
118
-
119
- it "should support blockquotes" do
120
- remark("<blockquote>Cogito, ergo sum</blockquote>").should == '> Cogito, ergo sum'
121
- remark("<blockquote><p>I think</p><p>therefore I am</p></blockquote>").should == "> I think\n> \n> therefore I am"
122
- end
123
-
124
- it "should support image tags" do
125
- remark("<img src='moo.jpg' alt='cow'>").should == '![cow](moo.jpg)'
126
- remark("<img src='moo.jpg' alt='cow' width='16'>").should == '<img src="moo.jpg" alt="cow" width="16" />'
127
- end
128
-
129
- it "should not have BR ruin all the fun" do
130
- remark("<p>Foo<br>bar</p>").should == "Foo \nbar"
131
- remark("<p>Foo<br>\nbar <code>baz</code></p>").should == "Foo \nbar `baz`"
132
- remark("<p>Foo</p><br><p>Bar</p>").should == "Foo\n\nBar"
133
- end
134
-
135
- it "should scope to the most likely element that holds content" do
136
- remark(<<-HTML).should == "Wow, 3 paragraphs\n\nThis must be where the content is\n\nI'm sure"
137
- <html>
138
- <body>
139
- <div id="div1">
140
- <p>Only 1 paragraph</p>
141
- </div>
142
- <div id="div3">
143
- <p>Wow, 3 paragraphs</p>
144
- <p>This must be where the content is</p>
145
- <p>I'm sure</p>
146
- </div>
147
- <div id="div2">
148
- <p>Only 2 paragraphs</p>
149
- <p>How disappointing</p>
150
- </div>
151
- </body>
152
- </html>
153
- HTML
90
+ remark("<pre>def foo\n</pre>").should == " def foo"
91
+ end
92
+
93
+ describe "inline" do
94
+ it "should remark inline elements" do
95
+ remark("<p>I'm so <strong>strong</strong></p>").should == "I'm so **strong**"
96
+ remark("<p>I'm so <em>emo</em></p>").should == "I'm so _emo_"
97
+ remark("<ul><li><em>Inline</em> stuff in <strong>lists</strong></li></ul>").should == "* _Inline_ stuff in **lists**"
98
+ remark("<h1>Headings <em>too</em></h1>").should == '# Headings _too_'
99
+ end
100
+
101
+ it "should handle nested inline elements" do
102
+ remark("<p>I <strong>love <code>code</code></strong></p>").should == "I **love `code`**"
103
+ remark("<p>I <a href='#'>am <em>fine</em></a></p>").should == "I [am _fine_](#)"
104
+ end
105
+ end
106
+
107
+ describe "hyperlinks" do
108
+ it "should support hyperlinks" do
109
+ remark("<p>Click <a href='http://mislav.uniqpath.com'>here</a></p>").should ==
110
+ "Click [here](http://mislav.uniqpath.com)"
111
+ remark("<a href='/foo' title='bar'>baz</a>").should == '[baz](/foo "bar")'
112
+ end
113
+
114
+ it "should have reference-style hyperlinks" do
115
+ remark("<p>Click <a href='foo' title='mooslav'>here</a> and <a href='bar'>there</a></p>", :reference_links => true).should ==
116
+ "Click [here][1] and [there][2]\n\n\n[1]: foo \"mooslav\"\n[2]: bar"
117
+ remark("<p>Click <a href='foo'>here</a> and <a href='foo'>there</a></p>", :reference_links => true).should ==
118
+ "Click [here][1] and [there][1]\n\n\n[1]: foo"
119
+ remark("", :reference_links => true).should == ""
120
+ end
121
+ end
122
+
123
+ it "should support ignores" do
124
+ remark("<p>Foo <span>bar</span> baz</p>", :ignores => ['span']).should == "Foo baz"
125
+ end
126
+
127
+ describe "scoping" do
128
+ before do
129
+ @html = <<-HTML
130
+ <html>
131
+ <body>
132
+ <div id="div1">
133
+ <p>Only 1 paragraph</p>
134
+ </div>
135
+ <div id="div3">
136
+ <p>Wow, 3 paragraphs</p>
137
+ <p>This must be where the content is</p>
138
+ <p>I'm sure</p>
139
+ </div>
140
+ <div id="div2">
141
+ <p>Only 2 paragraphs</p>
142
+ <p>How disappointing</p>
143
+ </div>
144
+ </body>
145
+ </html>
146
+ HTML
147
+ end
148
+
149
+ it "should scope to the most likely element that holds content" do
150
+ remark(@html).should == "Wow, 3 paragraphs\n\nThis must be where the content is\n\nI'm sure"
151
+ end
152
+
153
+ it "should scope to the explicit scope" do
154
+ remark(@html, :scope => '#div2').should == "Only 2 paragraphs\n\nHow disappointing"
155
+ end
154
156
  end
155
157
  end
156
-
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mislav-remark
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - "Mislav Marohni\xC4\x87"
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-06-24 00:00:00 -07:00
12
+ date: 2009-07-30 00:00:00 -07:00
13
13
  default_executable: remark
14
14
  dependencies: []
15
15
 
@@ -24,12 +24,16 @@ extra_rdoc_files: []
24
24
  files:
25
25
  - Rakefile
26
26
  - bin/remark
27
+ - lib/remark/core_ext.rb
28
+ - lib/remark/hpricot_ext.rb
27
29
  - lib/remark.rb
30
+ - spec/hpricot_ext_spec.rb
28
31
  - spec/remark_spec.rb
29
32
  - spec/sample.html
30
33
  - README.markdown
31
34
  has_rdoc: false
32
35
  homepage: http://github.com/mislav/remark
36
+ licenses:
33
37
  post_install_message:
34
38
  rdoc_options: []
35
39
 
@@ -50,7 +54,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
50
54
  requirements: []
51
55
 
52
56
  rubyforge_project:
53
- rubygems_version: 1.2.0
57
+ rubygems_version: 1.3.5
54
58
  signing_key:
55
59
  specification_version: 3
56
60
  summary: HTML to Markdown converter