mislav-remark 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -1,14 +1,20 @@
1
+ desc "renders the spec/sample.html to Markdown"
2
+ task :sample do
3
+ system %(ruby -Ilib -rubygems bin/remark spec/sample.html)
4
+ end
5
+
1
6
  desc "generates .gemspec file"
2
7
  task :gemspec do
3
8
  spec = Gem::Specification.new do |gem|
4
9
  gem.name = "remark"
10
+ gem.version = '0.3.0'
11
+
5
12
  gem.summary = "HTML to Markdown converter"
6
13
  gem.email = "mislav.marohnic@gmail.com"
7
14
  gem.homepage = "http://github.com/mislav/remark"
8
15
  gem.authors = ["Mislav Marohnić"]
9
16
  gem.has_rdoc = false
10
17
 
11
- gem.version = '0.2.1'
12
18
  gem.files = FileList['Rakefile', '{bin,lib,rails,spec}/**/*', 'README*', 'LICENSE*'] & `git ls-files`.split("\n")
13
19
  gem.executables = Dir['bin/*'].map { |f| File.basename(f) }
14
20
  end
data/bin/remark CHANGED
@@ -1,4 +1,22 @@
1
1
  #!/usr/bin/env ruby
2
+ require 'optparse'
2
3
  require 'remark'
3
4
 
4
- puts Remark.new(ARGF.read).to_markdown
5
+ options = {}
6
+ OptionParser.new do |opts|
7
+ opts.banner = "Usage: remark [options] [FILE]"
8
+
9
+ opts.on("-n", "--inline-links", "Render link URLs inline (instead of reference-style)") do |inline|
10
+ options[:reference_links] = !inline
11
+ end
12
+
13
+ opts.on("-s", "--scope EXPR", "Scope to a spefic CSS/XPath expression in the HTML document") do |scope|
14
+ options[:scope] = scope
15
+ end
16
+
17
+ opts.on("-i", "--ignore EXPR", "Ignore elements that match CSS/XPath expression") do |expr|
18
+ (options[:ignores] ||= []) << expr
19
+ end
20
+ end.parse!
21
+
22
+ puts Remark.new(ARGF.read, options).to_markdown
@@ -1,16 +1,27 @@
1
- require 'hpricot'
1
+ require 'remark/hpricot_ext'
2
2
 
3
3
  class Remark
4
- def initialize(source)
4
+ DEFAULT_OPTIONS = { :reference_links => true }
5
+
6
+ def initialize(source, options = {})
5
7
  @doc = Hpricot(source)
8
+ @options = DEFAULT_OPTIONS.merge options
9
+ @links = []
10
+ @ignored_elements = nil
6
11
  end
7
12
 
8
13
  def to_markdown
9
- remark_block(scope)
14
+ parent = scope
15
+ collect_ignored_elements(parent)
16
+ links = @options[:links] = [] unless inline_links?
17
+ result = parent.to_markdown(@options)
18
+ result + (inline_links? || links.empty?? '' : "\n\n\n" + output_reference_links(links))
10
19
  end
11
20
 
12
21
  def scope
13
- if body = @doc.at('/html/body')
22
+ if scope = @options[:scope]
23
+ @doc.at(scope)
24
+ elsif body = @doc.at('/html/body')
14
25
  candidates = (body / 'p').inject(Hash.new(0)) do |memo, para|
15
26
  memo[para.parent] += 1
16
27
  memo
@@ -22,135 +33,29 @@ class Remark
22
33
  end
23
34
  end
24
35
 
25
- IGNORE = %w(script head style)
26
- BLOCK = %w(p blockquote h1 h2 h3 h4 h5 h6 pre)
27
-
28
- private
29
-
30
- def valid_attributes?(elem)
31
- case elem.name
32
- when 'a'
33
- (elem.attributes.keys - %w(title)) == %w(href)
34
- when 'img'
35
- (elem.attributes.keys - %w(title)).sort == %w(alt src)
36
- else
37
- elem.attributes.empty?
38
- end
39
- end
40
-
41
- def remark_block(elem)
42
- remark_children(elem).
43
- reject { |item| item.blank? }.
44
- join("\n\n")
36
+ def inline_links?
37
+ !@options[:reference_links]
45
38
  end
46
39
 
47
- def remark_children(node)
48
- remarked = []
49
- node.children.each do |item|
50
- result = remark_item(item)
51
- remarked << result if result
52
- end
53
- remarked
54
- end
55
-
56
- def remark_item(item)
57
- if item.text?
58
- item.to_s.gsub(/\n+/, ' ') unless item.blank?
59
- elsif item.elem?
60
- if IGNORE.include?(item.name)
61
- nil
62
- elsif valid_attributes?(item)
63
- remark_element(item)
64
- else
65
- item
66
- end
67
- end
68
- end
69
-
70
- def remark_element(elem)
71
- case elem.name
72
- when 'p'
73
- remark_inline(elem)
74
- when /^h([1-6])$/
75
- ('#' * $1.to_i) + ' ' + remark_inline(elem)
76
- when 'ul', 'ol'
77
- remark_list(elem)
78
- when 'li'
79
- elem.children.any? { |e| e.elem? and BLOCK.include?(e.name) } ?
80
- remark_block(elem).indent : remark_inline(elem)
81
- when 'pre'
82
- elem.inner_text.indent
83
- when 'em'
84
- "_#{elem.inner_text}_"
85
- when 'strong'
86
- "**#{elem.inner_text}**"
87
- when 'code'
88
- code = elem.inner_text
89
- code.index('`') ? "`` #{code} ``" : "`#{code}`"
90
- when 'a'
91
- remark_link(elem.inner_html, elem.attributes['href'], elem.attributes['title'])
92
- when 'img'
93
- '!' + remark_link(elem.attributes['alt'], elem.attributes['src'], elem.attributes['title'])
94
- when 'blockquote'
95
- remark_children(elem).join("\n\n").indent('> ')
96
- when 'br'
97
- " \n" + elem.inner_html
98
- else
99
- elem
40
+ def output_reference_links(links)
41
+ references = []
42
+ links.each_with_index do |(href, title), i|
43
+ references << "[#{i + 1}]: #{href}#{title ? ' ' + title.inspect : ''}"
100
44
  end
45
+ references.join("\n")
101
46
  end
102
47
 
103
- def remark_link(text, href, title = nil)
104
- title_markup = title ? %( "#{title}") : ''
105
- "[#{text}](#{href}#{title_markup})"
106
- end
48
+ private
107
49
 
108
- def remark_inline(elem)
109
- remark_children(elem).join('').strip.gsub(/ {2,}(?!\n)/, ' ').gsub(/(\n) +/, '\1')
50
+ def ignore_element?(elem)
51
+ IGNORE.include?(elem.name) or (@ignored_elements and @ignored_elements.include?(elem))
110
52
  end
111
53
 
112
- def remark_list(list)
113
- unordered = list.name == 'ul'
114
- marker = unordered ? '*' : 0
115
- nested = false
116
-
117
- items = remark_children(list).map do |item|
118
- current = unordered ? marker : "#{marker += 1}."
119
- if item =~ /\A\s/
120
- nested = true
121
- item[0, current.length] = current
122
- item
123
- else
124
- current + ' ' + item
125
- end
54
+ def collect_ignored_elements(scope)
55
+ if @options[:ignores]
56
+ @options[:ignored_elements] = @options[:ignores].map do |expr|
57
+ scope.search(expr).to_a
58
+ end.flatten.uniq
126
59
  end
127
-
128
- items.join("\n" * (nested ? 2 : 1))
129
60
  end
130
61
  end
131
-
132
- Object.class_eval do
133
- def blank?() false end
134
- end
135
-
136
- NilClass.class_eval do
137
- def blank?() true end
138
- end
139
-
140
- String.class_eval do
141
- def blank?
142
- self.empty? or !!(self =~ /\A\s+\Z/)
143
- end
144
-
145
- def squish
146
- self.strip.gsub!(/\s+/, ' ')
147
- end
148
-
149
- def indent(with = ' ' * 4)
150
- self.gsub(/^/, with)
151
- end
152
- end
153
-
154
- Hpricot::Text.class_eval do
155
- def blank?() to_s.blank? end
156
- end
@@ -0,0 +1,25 @@
1
+ Object.class_eval do
2
+ def blank?() false end
3
+ end
4
+
5
+ NilClass.class_eval do
6
+ def blank?() true end
7
+ end
8
+
9
+ String.class_eval do
10
+ def blank?
11
+ self.empty? or !!(self =~ /\A\s+\Z/)
12
+ end
13
+
14
+ def squeeze_whitespace
15
+ self.tr("\n\t", ' ').squeeze(' ')
16
+ end
17
+
18
+ def indent(with = ' ' * 4)
19
+ self.gsub(/^/, with)
20
+ end
21
+ end
22
+
23
+ Hpricot::Text.module_eval do
24
+ def blank?() to_s.blank? end
25
+ end
@@ -0,0 +1,196 @@
1
+ require 'hpricot'
2
+ require 'remark/core_ext'
3
+
4
+ # this applies the default behavior to virtually all Hpricot classes
5
+ Hpricot::Node.module_eval do
6
+ def to_markdown(options = {}) nil end
7
+ def markdown_block?() false end
8
+ end
9
+
10
+ # nothing special to process on Text or CData
11
+ Hpricot::Text.module_eval do
12
+ def to_markdown(options = {}) to_s.squeeze_whitespace end
13
+ end
14
+
15
+ Hpricot::CData.module_eval do
16
+ def to_markdown(options = {}) to_s.squeeze_whitespace end
17
+ end
18
+
19
+ # elements that have children
20
+ Hpricot::Container.module_eval do
21
+ def to_markdown(options = {})
22
+ return '' unless self.children
23
+ previous_was_block = false
24
+ parent_is_block = self.markdown_block?
25
+
26
+ # recurse over this element's children
27
+ content = self.children.inject([]) do |all, child|
28
+ current_is_block = child.markdown_block?
29
+ child_content = child.to_markdown(options)
30
+
31
+ # skip this node if its markdown is nil, empty or, in case
32
+ # that the previous element was a block, all-whitespace
33
+ unless child_content.nil? or child_content.empty? or (previous_was_block and child_content.blank?)
34
+ # handle separating of adjacent markdown blocks with an empty line
35
+ if not all.empty? and current_is_block or previous_was_block
36
+ # strip trailing whitespace if we're opening a new block
37
+ all.last.blank?? all.pop : all.last.rstrip!
38
+ # guard against adding a newline at the beginning
39
+ all << "\n\n" if all.any?
40
+ end
41
+
42
+ unless 'pre' == child.name
43
+ # strip whitespace from the left if ...
44
+ child_content.lstrip! if previous_was_block or # we're adjacent to a block
45
+ (parent_is_block and child == self.children.first) or # this is the first child
46
+ (not all.empty? and all.last =~ / ( \n)?$/) # we're following a space or a forced line break token
47
+
48
+
49
+ # strip whitespace from the right if this is the last node in a block
50
+ child_content.rstrip! if parent_is_block and self.children.last == child
51
+ end
52
+
53
+ all << child_content
54
+ end
55
+
56
+ previous_was_block = current_is_block
57
+ all
58
+ end
59
+
60
+ result = content.join('')
61
+ return result
62
+ end
63
+ end
64
+
65
+ # elements without children
66
+ Hpricot::Leaf.module_eval do
67
+ def to_markdown(options = {})
68
+ inner_text.squeeze_whitespace if elem?
69
+ end
70
+ end
71
+
72
+ Hpricot::Elem.module_eval do
73
+ IGNORE = %w(script head style)
74
+ ALLOWED_EMPTY = %w(img br hr )
75
+ MARKDOWN_BLOCK = %w(p blockquote h1 h2 h3 h4 h5 h6 pre hr)
76
+ MARKDOWN_INLINE = %w(em strong code a img br)
77
+ MARKDOWN_RECOGNIZED = MARKDOWN_BLOCK + MARKDOWN_INLINE + %w(div)
78
+ HTML_BLOCK = MARKDOWN_BLOCK + %w(ul ol dl div noscript form table address fieldset)
79
+
80
+ def to_markdown(options = {})
81
+ return nil if markdown_ignored?(options)
82
+ return '' if markdown_empty?
83
+ return to_s unless markdown_supported_attributes?
84
+
85
+ case name
86
+ when 'div', 'noscript'
87
+ super
88
+ when 'p'
89
+ super
90
+ when /^h([1-6])$/
91
+ ('#' * $1.to_i) + ' ' + super
92
+ when 'ul', 'ol'
93
+ remark_list(options)
94
+ when 'li'
95
+ content = super
96
+ content = content.indent if children.any? { |e| e.markdown_block? }
97
+ content
98
+ when 'pre'
99
+ inner_text.rstrip.indent
100
+ when 'em'
101
+ "_#{super}_"
102
+ when 'strong'
103
+ "**#{super}**"
104
+ when 'code'
105
+ code = inner_text
106
+ code.index('`') ? "`` #{code} ``" : "`#{code}`"
107
+ when 'a'
108
+ remark_link(super, attributes['href'], attributes['title'], options)
109
+ when 'img'
110
+ '!' + remark_link(attributes['alt'], attributes['src'], attributes['title'], :reference_links => false)
111
+ when 'blockquote'
112
+ super.indent('> ')
113
+ when 'br'
114
+ " \n" + inner_html
115
+ else
116
+ to_s
117
+ end
118
+ end
119
+
120
+ def remark_list(options = {})
121
+ unordered = self.name == 'ul'
122
+ marker = unordered ? '*' : 0
123
+ nested = false
124
+
125
+ items = self.children_of_type('li').map do |item|
126
+ item = item.to_markdown(options)
127
+ current = unordered ? marker : "#{marker += 1}."
128
+ if item =~ /\A\s/
129
+ nested = true
130
+ item[0, current.length] = current
131
+ item
132
+ else
133
+ current + ' ' + item
134
+ end
135
+ end
136
+
137
+ items.join("\n" * (nested ? 2 : 1))
138
+ end
139
+
140
+ def markdown_block?
141
+ HTML_BLOCK.include?(name)
142
+ end
143
+
144
+ def markdown_recognized?
145
+ MARKDOWN_RECOGNIZED.include?(name)
146
+ end
147
+
148
+ protected
149
+
150
+ def markdown_ignored?(options)
151
+ IGNORE.include?(name) or
152
+ (options[:ignored_elements] and options[:ignored_elements].include?(self))
153
+ end
154
+
155
+ def markdown_empty?
156
+ empty? and markdown_recognized? and not ALLOWED_EMPTY.include?(name)
157
+ end
158
+
159
+ def markdown_supported_attributes?
160
+ case name
161
+ when 'div'
162
+ true
163
+ when 'a'
164
+ attribute_names_match?('href', 'title')
165
+ when 'img'
166
+ attribute_names_match?(%w(alt src), 'title')
167
+ when 'ol', 'ul'
168
+ attributes.empty? and children.all? do |item|
169
+ not item.elem? or (item.name == 'li' and item.attributes.empty?)
170
+ end
171
+ else
172
+ attributes.empty?
173
+ end
174
+ end
175
+
176
+ def attribute_names_match?(only, optional = nil)
177
+ names = attributes.keys.sort
178
+ names -= Array(optional) if optional
179
+ names == Array(only)
180
+ end
181
+
182
+ def remark_link(text, href, title = nil, options = {})
183
+ if options[:reference_links]
184
+ if existing = options[:links].find { |h, t| href == h }
185
+ num = options[:links].index(existing) + 1
186
+ else
187
+ options[:links] << [href, title]
188
+ num = options[:links].length
189
+ end
190
+ "[#{text}][#{num}]"
191
+ else
192
+ title_markup = title ? %( "#{title}") : ''
193
+ "[#{text}](#{href}#{title_markup})"
194
+ end
195
+ end
196
+ end
@@ -0,0 +1,138 @@
1
+ require 'remark/hpricot_ext'
2
+
3
+ describe Hpricot, "remark extensions" do
4
+ before(:all) do
5
+ @doc = Hpricot(<<-HTML.strip)
6
+ <?xml version="moo" ?>
7
+ <!DOCTYPE html>
8
+ <html>
9
+ <head>
10
+ <title>Sample document</title>
11
+ </head>
12
+ <body>
13
+ <h1>Sample <strong>Remark</strong> document</h1>
14
+ <p>
15
+ A paragraph with <em>nested</em> <strong>content</strong>
16
+ and <i>Remark</i>-supported elements.
17
+ </p>
18
+
19
+ <a name="content"> </a>
20
+ <h2>The content</h2>
21
+ <div id="content">
22
+ <p>First</p>
23
+ <p>Second</p>
24
+ Some content
25
+ <em>in-between</em>
26
+ <p>Third</p>
27
+ </div>
28
+ <p class="foo">I has classname</p>
29
+
30
+ <div id="empty"></div>
31
+ <blockquote>
32
+ Some famous quote
33
+ <blockquote>Nested famous quote</blockquote>
34
+ </blockquote>
35
+ <div class="code">
36
+ <p>Sample code:</p>
37
+ <pre>def preformatted
38
+ text
39
+ end
40
+ </pre>
41
+ </div>
42
+ <img src='moo.jpg' alt='cow'>
43
+ <img src='moo.jpg' alt='cow' width='16'>
44
+
45
+ <code>simple</code> <code>comp ` lex</code> <code>&lt;tag&gt;</code>
46
+
47
+ <div id="br">
48
+ <p>Foo<br>bar</p>
49
+ <p>Foo<br>
50
+ bar <code>baz</code></p>
51
+ <p>Foo</p><br><br><p>Bar</p><br>
52
+ </div>
53
+
54
+ <ul>
55
+ <li>First</li>
56
+ <li>Second</li>
57
+ </ul>
58
+ <ol>
59
+ <li>First</li>
60
+ <li>Second</li>
61
+ </ol>
62
+ </body>
63
+ </html>
64
+ HTML
65
+ end
66
+
67
+ def remark(elem, options = {})
68
+ (String === elem ? @doc.at(elem) : elem).to_markdown(options)
69
+ end
70
+
71
+ it "should return empty string for empty document" do
72
+ remark(Hpricot('')).should == ''
73
+ end
74
+
75
+ it "should ignore DOCTYPE, HEAD and XML processing instructions" do
76
+ remark('head').should be_nil
77
+ remark(@doc.children[0]).should be_nil # doctype
78
+ remark(@doc.children[2]).should be_nil # xmldecl
79
+ end
80
+
81
+ it "should have whitespace nodes respond to blank" do
82
+ @doc.at('a[@name]').children.first.blank?
83
+ end
84
+
85
+ it "should support headings" do
86
+ remark('h1').should == "# Sample **Remark** document"
87
+ remark('h2').should == "## The content"
88
+ end
89
+
90
+ it "should support paragraphs" do
91
+ remark('p').should == "A paragraph with _nested_ **content** and <i>Remark</i>-supported elements."
92
+ end
93
+
94
+ it "should split paragraphs with an empty line" do
95
+ remark('#content').should == "First\n\nSecond\n\nSome content _in-between_\n\nThird"
96
+ end
97
+
98
+ it "should keep full HTML for paragraphs if they have attributes" do
99
+ remark('p.foo').should == '<p class="foo">I has classname</p>'
100
+ end
101
+
102
+ it "should not break on empty DIV" do
103
+ remark('#empty').should == ""
104
+ end
105
+
106
+ it "should support blockquotes" do
107
+ remark('blockquote > blockquote').should == "> Nested famous quote"
108
+ remark('blockquote').should == "> Some famous quote\n> \n> > Nested famous quote"
109
+ end
110
+
111
+ it "should support preformatted text" do
112
+ remark('div.code').should == "Sample code:\n\n def preformatted\n text\n end"
113
+ end
114
+
115
+ it "should support image tags" do
116
+ remark('img[@alt]').should == '![cow](moo.jpg)'
117
+ remark('img[@width]').should == '<img src="moo.jpg" alt="cow" width="16" />'
118
+ end
119
+
120
+ it "should support code spans" do
121
+ remark('code').should == "`simple`"
122
+ remark('code ~ code').should == "`` comp ` lex ``"
123
+ remark('code ~ code ~ code').should == "`<tag>`"
124
+ end
125
+
126
+ it "should support BR" do
127
+ remark('#br').should == "Foo \nbar\n\nFoo \nbar `baz`\n\nFoo\n\nBar"
128
+ end
129
+
130
+ it "should support unordered list" do
131
+ remark('ul').should == "* First\n* Second"
132
+ end
133
+
134
+ it "should support ordered list" do
135
+ remark('ol').should == "1. First\n2. Second"
136
+ end
137
+ end
138
+
@@ -1,8 +1,9 @@
1
1
  require 'remark'
2
2
 
3
3
  describe Remark do
4
- def remark(source)
5
- described_class.new(source).to_markdown
4
+ def remark(source, options = {})
5
+ options = {:reference_links => false}.merge(options)
6
+ described_class.new(source, options).to_markdown
6
7
  end
7
8
 
8
9
  it "should let through text content" do
@@ -10,17 +11,6 @@ describe Remark do
10
11
  remark("Foo bar\nbaz").should == 'Foo bar baz'
11
12
  end
12
13
 
13
- it "should split paragraphs with an empty line" do
14
- remark("<p>Foo bar</p>").should == 'Foo bar'
15
- remark("<p>Foo bar</p><p>baz").should == "Foo bar\n\nbaz"
16
- remark("<p>Foo bar</p>baz").should == "Foo bar\n\nbaz"
17
- end
18
-
19
- it "should output title syntax" do
20
- remark("<h1>Foo bar</h1>").should == '# Foo bar'
21
- remark("<h2>Foo bar</h2>").should == '## Foo bar'
22
- end
23
-
24
14
  it "should preserve elements in remarked blocks" do
25
15
  remark("<p>Foo <ins>bar</ins></p>").should == 'Foo <ins>bar</ins>'
26
16
  remark("<h2>Foo <ins>bar</ins></h2>").should == '## Foo <ins>bar</ins>'
@@ -31,15 +21,6 @@ describe Remark do
31
21
  remark("<p>If you&#8217;re doing all your development on the &#8220;master&#8221; branch, you&#8217;re not using git").should == "If you’re doing all your development on the “master” branch, you’re not using git"
32
22
  end
33
23
 
34
- it "should ignore tags without user-facing content" do
35
- remark("<script>foo</script>").should == ''
36
- remark("<head>foo</head>").should == ''
37
- end
38
-
39
- it "should leave known elements with attributes intact" do
40
- remark("<p class='notice'>Kittens attack!</p>").should == '<p class="notice">Kittens attack!</p>'
41
- end
42
-
43
24
  it "should leave unknown elements intact" do
44
25
  remark(<<-HTML).should == "Foo\n\n<table>data</table>\n\nBar"
45
26
  <p>Foo</p>
@@ -48,109 +29,129 @@ describe Remark do
48
29
  HTML
49
30
  end
50
31
 
51
- it "should strip excess whitespace" do
52
- remark(<<-HTML).should == "Foo bar"
53
- <p>
54
- Foo
55
- bar
56
- </p>
57
- HTML
58
- end
32
+ describe "whitespace" do
33
+ it "should strip excess whitespace" do
34
+ remark(<<-HTML).should == "Foo bar"
35
+ <p>
36
+ Foo
37
+ bar
38
+ </p>
39
+ HTML
40
+ end
59
41
 
60
- it "should strip whitespace in text nodes between main content" do
61
- pending
62
- remark(<<-HTML).should == "Foo\n\nbar\n\nBaz"
63
- <p>Foo</p>
42
+ it "should strip whitespace in text nodes between processed nodes" do
43
+ remark(<<-HTML).should == "Foo\n\nbar\n\nBaz"
44
+ <p>Foo</p>
64
45
 
65
- bar
66
- <p>Baz</p>
67
- HTML
68
- end
69
-
70
- it "should support lists" do
71
- remark(<<-HTML).should == "* foo\n* bar"
72
- <ul>
73
- <li>foo</li>
74
- <li>bar</li>
75
- </ul>
76
- HTML
46
+ bar
47
+ <p>Baz</p>
48
+ HTML
49
+ end
50
+ end
51
+
52
+ describe "lists" do
53
+ it "should support lists" do
54
+ remark(<<-HTML).should == "* foo\n* bar"
55
+ <ul>
56
+ <li>foo</li>
57
+ <li>bar</li>
58
+ </ul>
59
+ HTML
77
60
 
78
- remark(<<-HTML).should == "1. foo\n2. bar"
79
- <ol>
80
- <li>foo</li>
81
- <li>bar</li>
82
- </ol>
83
- HTML
84
- end
85
-
86
- it "should support lists with nested content" do
87
- remark(<<-HTML).should == "* foo\n \n bar\n\n* baz"
88
- <ul>
89
- <li><p>foo</p><p>bar</p></li>
90
- <li><p>baz</p></li>
91
- </ul>
92
- HTML
61
+ remark(<<-HTML).should == "1. foo\n2. bar"
62
+ <ol>
63
+ <li>foo</li>
64
+ <li>bar</li>
65
+ </ol>
66
+ HTML
67
+ end
68
+
69
+ it "should support lists with nested content" do
70
+ remark(<<-HTML).should == "* foo\n \n bar\n\n* baz"
71
+ <ul>
72
+ <li><p>foo</p><p>bar</p></li>
73
+ <li><p>baz</p></li>
74
+ </ul>
75
+ HTML
76
+ end
77
+
78
+ it "should output malformed lists as HTML" do
79
+ remark(<<-HTML).should == "<ul>\n <span>bar</span>\n </ul>"
80
+ <ul>
81
+ <span>bar</span>
82
+ </ul>
83
+ HTML
84
+ end
93
85
  end
94
86
 
95
87
  it "should support preformatted blocks" do
96
88
  remark("<pre>def foo\n bar\nend</pre>").should == " def foo\n bar\n end"
97
89
  remark("<pre><code>def foo\n &lt;bar&gt;\nend</code></pre>").should == " def foo\n <bar>\n end"
98
- end
99
-
100
- it "should remark inline elements" do
101
- remark("<p>I'm so <strong>strong</strong></p>").should == "I'm so **strong**"
102
- remark("<p>I'm so <em>emo</em></p>").should == "I'm so _emo_"
103
- remark("<ul><li><em>Inline</em> stuff in <strong>lists</strong></li></ul>").should == "* _Inline_ stuff in **lists**"
104
- remark("<h1>Headings <em>too</em></h1>").should == '# Headings _too_'
105
- end
106
-
107
- it "should remark inline code" do
108
- remark("<p>Write more <code>code</code></p>").should == "Write more `code`"
109
- remark("<p>Even with <code>`backticks`</code></p>").should == "Even with `` `backticks` ``"
110
- remark("<p>Or HTML <code>&lt;tags&gt;</code></p>").should == "Or HTML `<tags>`"
111
- end
112
-
113
- it "should support hyperlinks" do
114
- remark("<p>Click <a href='http://mislav.uniqpath.com'>here</a></p>").should ==
115
- "Click [here](http://mislav.uniqpath.com)"
116
- remark("<a href='/foo' title='bar'>baz</a>").should == '[baz](/foo "bar")'
117
- end
118
-
119
- it "should support blockquotes" do
120
- remark("<blockquote>Cogito, ergo sum</blockquote>").should == '> Cogito, ergo sum'
121
- remark("<blockquote><p>I think</p><p>therefore I am</p></blockquote>").should == "> I think\n> \n> therefore I am"
122
- end
123
-
124
- it "should support image tags" do
125
- remark("<img src='moo.jpg' alt='cow'>").should == '![cow](moo.jpg)'
126
- remark("<img src='moo.jpg' alt='cow' width='16'>").should == '<img src="moo.jpg" alt="cow" width="16" />'
127
- end
128
-
129
- it "should not have BR ruin all the fun" do
130
- remark("<p>Foo<br>bar</p>").should == "Foo \nbar"
131
- remark("<p>Foo<br>\nbar <code>baz</code></p>").should == "Foo \nbar `baz`"
132
- remark("<p>Foo</p><br><p>Bar</p>").should == "Foo\n\nBar"
133
- end
134
-
135
- it "should scope to the most likely element that holds content" do
136
- remark(<<-HTML).should == "Wow, 3 paragraphs\n\nThis must be where the content is\n\nI'm sure"
137
- <html>
138
- <body>
139
- <div id="div1">
140
- <p>Only 1 paragraph</p>
141
- </div>
142
- <div id="div3">
143
- <p>Wow, 3 paragraphs</p>
144
- <p>This must be where the content is</p>
145
- <p>I'm sure</p>
146
- </div>
147
- <div id="div2">
148
- <p>Only 2 paragraphs</p>
149
- <p>How disappointing</p>
150
- </div>
151
- </body>
152
- </html>
153
- HTML
90
+ remark("<pre>def foo\n</pre>").should == " def foo"
91
+ end
92
+
93
+ describe "inline" do
94
+ it "should remark inline elements" do
95
+ remark("<p>I'm so <strong>strong</strong></p>").should == "I'm so **strong**"
96
+ remark("<p>I'm so <em>emo</em></p>").should == "I'm so _emo_"
97
+ remark("<ul><li><em>Inline</em> stuff in <strong>lists</strong></li></ul>").should == "* _Inline_ stuff in **lists**"
98
+ remark("<h1>Headings <em>too</em></h1>").should == '# Headings _too_'
99
+ end
100
+
101
+ it "should handle nested inline elements" do
102
+ remark("<p>I <strong>love <code>code</code></strong></p>").should == "I **love `code`**"
103
+ remark("<p>I <a href='#'>am <em>fine</em></a></p>").should == "I [am _fine_](#)"
104
+ end
105
+ end
106
+
107
+ describe "hyperlinks" do
108
+ it "should support hyperlinks" do
109
+ remark("<p>Click <a href='http://mislav.uniqpath.com'>here</a></p>").should ==
110
+ "Click [here](http://mislav.uniqpath.com)"
111
+ remark("<a href='/foo' title='bar'>baz</a>").should == '[baz](/foo "bar")'
112
+ end
113
+
114
+ it "should have reference-style hyperlinks" do
115
+ remark("<p>Click <a href='foo' title='mooslav'>here</a> and <a href='bar'>there</a></p>", :reference_links => true).should ==
116
+ "Click [here][1] and [there][2]\n\n\n[1]: foo \"mooslav\"\n[2]: bar"
117
+ remark("<p>Click <a href='foo'>here</a> and <a href='foo'>there</a></p>", :reference_links => true).should ==
118
+ "Click [here][1] and [there][1]\n\n\n[1]: foo"
119
+ remark("", :reference_links => true).should == ""
120
+ end
121
+ end
122
+
123
+ it "should support ignores" do
124
+ remark("<p>Foo <span>bar</span> baz</p>", :ignores => ['span']).should == "Foo baz"
125
+ end
126
+
127
+ describe "scoping" do
128
+ before do
129
+ @html = <<-HTML
130
+ <html>
131
+ <body>
132
+ <div id="div1">
133
+ <p>Only 1 paragraph</p>
134
+ </div>
135
+ <div id="div3">
136
+ <p>Wow, 3 paragraphs</p>
137
+ <p>This must be where the content is</p>
138
+ <p>I'm sure</p>
139
+ </div>
140
+ <div id="div2">
141
+ <p>Only 2 paragraphs</p>
142
+ <p>How disappointing</p>
143
+ </div>
144
+ </body>
145
+ </html>
146
+ HTML
147
+ end
148
+
149
+ it "should scope to the most likely element that holds content" do
150
+ remark(@html).should == "Wow, 3 paragraphs\n\nThis must be where the content is\n\nI'm sure"
151
+ end
152
+
153
+ it "should scope to the explicit scope" do
154
+ remark(@html, :scope => '#div2').should == "Only 2 paragraphs\n\nHow disappointing"
155
+ end
154
156
  end
155
157
  end
156
-
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mislav-remark
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - "Mislav Marohni\xC4\x87"
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-06-24 00:00:00 -07:00
12
+ date: 2009-07-30 00:00:00 -07:00
13
13
  default_executable: remark
14
14
  dependencies: []
15
15
 
@@ -24,12 +24,16 @@ extra_rdoc_files: []
24
24
  files:
25
25
  - Rakefile
26
26
  - bin/remark
27
+ - lib/remark/core_ext.rb
28
+ - lib/remark/hpricot_ext.rb
27
29
  - lib/remark.rb
30
+ - spec/hpricot_ext_spec.rb
28
31
  - spec/remark_spec.rb
29
32
  - spec/sample.html
30
33
  - README.markdown
31
34
  has_rdoc: false
32
35
  homepage: http://github.com/mislav/remark
36
+ licenses:
33
37
  post_install_message:
34
38
  rdoc_options: []
35
39
 
@@ -50,7 +54,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
50
54
  requirements: []
51
55
 
52
56
  rubyforge_project:
53
- rubygems_version: 1.2.0
57
+ rubygems_version: 1.3.5
54
58
  signing_key:
55
59
  specification_version: 3
56
60
  summary: HTML to Markdown converter