remark 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/LICENSE ADDED
@@ -0,0 +1,18 @@
1
+ Copyright (c) 2009 Mislav Marohnić
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
4
+ this software and associated documentation files (the "Software"), to deal in
5
+ the Software without restriction, including without limitation the rights to
6
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
7
+ the Software, and to permit persons to whom the Software is furnished to do so,
8
+ subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in all
11
+ copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
15
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
16
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
17
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
18
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,29 @@
1
+ Remark — HTML→Markdown tool
2
+ ===========================
3
+
4
+ <i>Remark</i> parses HTML and delivers proper Markdown.
5
+
6
+ Usage
7
+ -----
8
+
9
+ From command-line:
10
+
11
+ remark path/to/file.html
12
+
13
+ or by STDIN:
14
+
15
+ echo "..." | remark
16
+
17
+ You can try feeding it a document from the web:
18
+
19
+ curl -s daringfireball.net/projects/markdown/basics | remark > result.markdown
20
+
21
+ See how it does.
22
+
23
+ If you've cloned the repository, invoke the binary like this:
24
+
25
+ ruby -Ilib -rubygems bin/remark spec/sample.html
26
+
27
+ And this is how you use it from Ruby code:
28
+
29
+ Remark.new('<h1>My document</h1><p>Some content</p>').to_markdown
@@ -0,0 +1,31 @@
1
+ desc "renders the spec/sample.html to Markdown"
2
+ task :sample do
3
+ system %(ruby -Ilib -rubygems bin/remark spec/sample.html)
4
+ end
5
+
6
+ desc "generates .gemspec file"
7
+ task :gemspec do
8
+ spec = Gem::Specification.new do |gem|
9
+ gem.name = "remark"
10
+ gem.version = '0.3.0'
11
+
12
+ gem.summary = "HTML to Markdown converter"
13
+ gem.email = "mislav.marohnic@gmail.com"
14
+ gem.homepage = "http://github.com/mislav/remark"
15
+ gem.authors = ["Mislav Marohnić"]
16
+ gem.has_rdoc = false
17
+
18
+ gem.files = FileList['Rakefile', '{bin,lib,rails,spec}/**/*', 'README*', 'LICENSE*'] & `git ls-files`.split("\n")
19
+ gem.executables = Dir['bin/*'].map { |f| File.basename(f) }
20
+ end
21
+
22
+ spec_string = spec.to_ruby
23
+
24
+ begin
25
+ Thread.new { eval("$SAFE = 3\n#{spec_string}", binding) }.join
26
+ rescue
27
+ abort "unsafe gemspec: #{$!}"
28
+ else
29
+ File.open("#{spec.name}.gemspec", 'w') { |file| file.write spec_string }
30
+ end
31
+ end
@@ -0,0 +1,22 @@
1
+ #!/usr/bin/env ruby
2
+ require 'optparse'
3
+ require 'remark'
4
+
5
+ options = {}
6
+ OptionParser.new do |opts|
7
+ opts.banner = "Usage: remark [options] [FILE]"
8
+
9
+ opts.on("-n", "--inline-links", "Render link URLs inline (instead of reference-style)") do |inline|
10
+ options[:reference_links] = !inline
11
+ end
12
+
13
+ opts.on("-s", "--scope EXPR", "Scope to a spefic CSS/XPath expression in the HTML document") do |scope|
14
+ options[:scope] = scope
15
+ end
16
+
17
+ opts.on("-i", "--ignore EXPR", "Ignore elements that match CSS/XPath expression") do |expr|
18
+ (options[:ignores] ||= []) << expr
19
+ end
20
+ end.parse!
21
+
22
+ puts Remark.new(ARGF.read, options).to_markdown
@@ -0,0 +1,61 @@
1
+ require 'remark/hpricot_ext'
2
+
3
+ class Remark
4
+ DEFAULT_OPTIONS = { :reference_links => true }
5
+
6
+ def initialize(source, options = {})
7
+ @doc = Hpricot(source)
8
+ @options = DEFAULT_OPTIONS.merge options
9
+ @links = []
10
+ @ignored_elements = nil
11
+ end
12
+
13
+ def to_markdown
14
+ parent = scope
15
+ collect_ignored_elements(parent)
16
+ links = @options[:links] = [] unless inline_links?
17
+ result = parent.to_markdown(@options)
18
+ result + (inline_links? || links.empty?? '' : "\n\n\n" + output_reference_links(links))
19
+ end
20
+
21
+ def scope
22
+ if scope = @options[:scope]
23
+ @doc.at(scope)
24
+ elsif body = @doc.at('/html/body')
25
+ candidates = (body / 'p').inject(Hash.new(0)) do |memo, para|
26
+ memo[para.parent] += 1
27
+ memo
28
+ end.invert
29
+
30
+ candidates[candidates.keys.max]
31
+ else
32
+ @doc
33
+ end
34
+ end
35
+
36
+ def inline_links?
37
+ !@options[:reference_links]
38
+ end
39
+
40
+ def output_reference_links(links)
41
+ references = []
42
+ links.each_with_index do |(href, title), i|
43
+ references << "[#{i + 1}]: #{href}#{title ? ' ' + title.inspect : ''}"
44
+ end
45
+ references.join("\n")
46
+ end
47
+
48
+ private
49
+
50
+ def ignore_element?(elem)
51
+ IGNORE.include?(elem.name) or (@ignored_elements and @ignored_elements.include?(elem))
52
+ end
53
+
54
+ def collect_ignored_elements(scope)
55
+ if @options[:ignores]
56
+ @options[:ignored_elements] = @options[:ignores].map do |expr|
57
+ scope.search(expr).to_a
58
+ end.flatten.uniq
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,25 @@
1
+ Object.class_eval do
2
+ def blank?() false end
3
+ end
4
+
5
+ NilClass.class_eval do
6
+ def blank?() true end
7
+ end
8
+
9
+ String.class_eval do
10
+ def blank?
11
+ self.empty? or !!(self =~ /\A\s+\Z/)
12
+ end
13
+
14
+ def squeeze_whitespace
15
+ self.tr("\n\t", ' ').squeeze(' ')
16
+ end
17
+
18
+ def indent(with = ' ' * 4)
19
+ self.gsub(/^/, with)
20
+ end
21
+ end
22
+
23
+ Hpricot::Text.module_eval do
24
+ def blank?() to_s.blank? end
25
+ end
@@ -0,0 +1,212 @@
1
+ require 'hpricot'
2
+ require 'remark/core_ext'
3
+
4
+ # this applies the default behavior to virtually all Hpricot classes
5
+ Hpricot::Node.module_eval do
6
+ def to_markdown(options = {}) nil end
7
+ def markdown_block?() false end
8
+ end
9
+
10
+ # nothing special to process on Text or CData
11
+ Hpricot::Text.module_eval do
12
+ def to_markdown(options = {}) to_s.squeeze_whitespace end
13
+ end
14
+
15
+ Hpricot::CData.module_eval do
16
+ def to_markdown(options = {}) to_s.squeeze_whitespace end
17
+ end
18
+
19
+ # elements that have children
20
+ Hpricot::Container.module_eval do
21
+ def to_markdown(options = {})
22
+ return '' unless self.children
23
+ previous_was_block = false
24
+ parent_is_block = self.markdown_block?
25
+
26
+ # recurse over this element's children
27
+ content = self.children.inject([]) do |all, child|
28
+ current_is_block = child.markdown_block?
29
+ child_content = child.to_markdown(options)
30
+
31
+ # skip this node if its markdown is nil, empty or, in case
32
+ # that the previous element was a block, all-whitespace
33
+ unless child_content.nil? or child_content.empty? or (previous_was_block and child_content.blank?)
34
+ # handle separating of adjacent markdown blocks with an empty line
35
+ if not all.empty? and current_is_block or previous_was_block
36
+ # strip trailing whitespace if we're opening a new block
37
+ all.last.blank?? all.pop : all.last.rstrip!
38
+ # guard against adding a newline at the beginning
39
+ all << "\n\n" if all.any?
40
+ end
41
+
42
+ unless 'pre' == child.name
43
+ # strip whitespace from the left if ...
44
+ child_content.lstrip! if previous_was_block or # we're adjacent to a block
45
+ (parent_is_block and child == self.children.first) or # this is the first child
46
+ (not all.empty? and all.last =~ / ( \n)?$/) # we're following a space or a forced line break token
47
+
48
+
49
+ # strip whitespace from the right if this is the last node in a block
50
+ child_content.rstrip! if parent_is_block and self.children.last == child
51
+ end
52
+
53
+ all << child_content
54
+ end
55
+
56
+ previous_was_block = current_is_block
57
+ all
58
+ end
59
+
60
+ result = content.join('')
61
+ return result
62
+ end
63
+ end
64
+
65
+ # elements without children
66
+ Hpricot::Leaf.module_eval do
67
+ def to_markdown(options = {})
68
+ inner_text.squeeze_whitespace if elem?
69
+ end
70
+ end
71
+
72
+ Hpricot::Elem.module_eval do
73
+ IGNORE = %w(script head style)
74
+ ALLOWED_EMPTY = %w(img br hr )
75
+ MARKDOWN_BLOCK = %w(p blockquote h1 h2 h3 h4 h5 h6 pre hr)
76
+ MARKDOWN_INLINE = %w(em strong code a img br)
77
+ MARKDOWN_RECOGNIZED = MARKDOWN_BLOCK + MARKDOWN_INLINE + %w(div)
78
+ HTML_BLOCK = MARKDOWN_BLOCK + %w(ul ol dl div noscript form table address fieldset)
79
+
80
+ def to_markdown(options = {})
81
+ return nil if markdown_ignored?(options)
82
+ return '' if markdown_empty?
83
+ return to_s unless markdown_supported_attributes?
84
+
85
+ case name
86
+ when 'div', 'noscript'
87
+ super
88
+ when 'p'
89
+ super
90
+ when /^h([1-6])$/
91
+ ('#' * $1.to_i) + ' ' + super
92
+ when 'ul', 'ol'
93
+ remark_list(options)
94
+ when 'li'
95
+ content = super
96
+ content = content.indent if children.any? { |e| e.markdown_block? }
97
+ content
98
+ when 'pre'
99
+ inner_text.rstrip.indent
100
+ when 'em'
101
+ "_#{super}_"
102
+ when 'strong'
103
+ "**#{super}**"
104
+ when 'code'
105
+ code = inner_text
106
+ code.index('`') ? "`` #{code} ``" : "`#{code}`"
107
+ when 'a'
108
+ remark_link(super, self['href'], self['title'], options)
109
+ when 'img'
110
+ '!' + remark_link(self['alt'], self['src'], self['title'], :reference_links => false)
111
+ when 'blockquote'
112
+ super.indent('> ')
113
+ when 'br'
114
+ " \n" + inner_html
115
+ else
116
+ to_s
117
+ end
118
+ end
119
+
120
+ def remark_list(options = {})
121
+ unordered = self.name == 'ul'
122
+ marker = unordered ? '*' : 0
123
+ nested = false
124
+
125
+ items = self.children_of_type('li').map do |item|
126
+ item = item.to_markdown(options)
127
+ current = unordered ? marker : "#{marker += 1}."
128
+ if item =~ /\A\s/
129
+ nested = true
130
+ item[0, current.length] = current
131
+ item
132
+ else
133
+ current + ' ' + item
134
+ end
135
+ end
136
+
137
+ items.join("\n" * (nested ? 2 : 1))
138
+ end
139
+
140
+ def markdown_block?
141
+ HTML_BLOCK.include?(name)
142
+ end
143
+
144
+ def markdown_recognized?
145
+ MARKDOWN_RECOGNIZED.include?(name)
146
+ end
147
+
148
+ protected
149
+
150
+ def markdown_ignored?(options)
151
+ IGNORE.include?(name) or
152
+ (options[:ignored_elements] and options[:ignored_elements].include?(self))
153
+ end
154
+
155
+ def markdown_empty?
156
+ empty? and markdown_recognized? and not ALLOWED_EMPTY.include?(name)
157
+ end
158
+
159
+ def markdown_supported_attributes?
160
+ case name
161
+ when 'div'
162
+ true
163
+ when 'a'
164
+ attribute_names_match?('href', 'title')
165
+ when 'img'
166
+ attribute_names_match?(%w(alt src), 'title')
167
+ when 'ol', 'ul'
168
+ attributes.empty? and children.all? do |item|
169
+ not item.elem? or (item.name == 'li' and item.attributes.empty?)
170
+ end
171
+ else
172
+ attributes.empty?
173
+ end
174
+ end
175
+
176
+ def attribute_names_match?(only, optional = nil)
177
+ names = attributes.keys.sort
178
+ names -= Array(optional) if optional
179
+ names == Array(only)
180
+ end
181
+
182
+ def remark_link(text, href, title = nil, options = {})
183
+ if options[:reference_links]
184
+ if existing = options[:links].find { |h, t| href == h }
185
+ num = options[:links].index(existing) + 1
186
+ else
187
+ options[:links] << [href, title]
188
+ num = options[:links].length
189
+ end
190
+ "[#{text}][#{num}]"
191
+ else
192
+ title_markup = title ? %( "#{title}") : ''
193
+ "[#{text}](#{href}#{title_markup})"
194
+ end
195
+ end
196
+ end
197
+
198
+ Hpricot::Attributes.class_eval do
199
+ methods = instance_methods.map { |m| m.to_sym }
200
+
201
+ unless methods.include? :empty?
202
+ def empty?
203
+ self.to_hash.empty?
204
+ end
205
+ end
206
+
207
+ unless methods.include? :keys
208
+ def keys
209
+ self.to_hash.keys
210
+ end
211
+ end
212
+ end
@@ -0,0 +1,138 @@
1
+ require 'remark/hpricot_ext'
2
+
3
+ describe Hpricot, "remark extensions" do
4
+ before(:all) do
5
+ @doc = Hpricot(<<-HTML.strip)
6
+ <?xml version="moo" ?>
7
+ <!DOCTYPE html>
8
+ <html>
9
+ <head>
10
+ <title>Sample document</title>
11
+ </head>
12
+ <body>
13
+ <h1>Sample <strong>Remark</strong> document</h1>
14
+ <p>
15
+ A paragraph with <em>nested</em> <strong>content</strong>
16
+ and <i>Remark</i>-supported elements.
17
+ </p>
18
+
19
+ <a name="content"> </a>
20
+ <h2>The content</h2>
21
+ <div id="content">
22
+ <p>First</p>
23
+ <p>Second</p>
24
+ Some content
25
+ <em>in-between</em>
26
+ <p>Third</p>
27
+ </div>
28
+ <p class="foo">I has classname</p>
29
+
30
+ <div id="empty"></div>
31
+ <blockquote>
32
+ Some famous quote
33
+ <blockquote>Nested famous quote</blockquote>
34
+ </blockquote>
35
+ <div class="code">
36
+ <p>Sample code:</p>
37
+ <pre>def preformatted
38
+ text
39
+ end
40
+ </pre>
41
+ </div>
42
+ <img src='moo.jpg' alt='cow'>
43
+ <img src='moo.jpg' alt='cow' width='16'>
44
+
45
+ <code>simple</code> <code>comp ` lex</code> <code>&lt;tag&gt;</code>
46
+
47
+ <div id="br">
48
+ <p>Foo<br>bar</p>
49
+ <p>Foo<br>
50
+ bar <code>baz</code></p>
51
+ <p>Foo</p><br><br><p>Bar</p><br>
52
+ </div>
53
+
54
+ <ul>
55
+ <li>First</li>
56
+ <li>Second</li>
57
+ </ul>
58
+ <ol>
59
+ <li>First</li>
60
+ <li>Second</li>
61
+ </ol>
62
+ </body>
63
+ </html>
64
+ HTML
65
+ end
66
+
67
+ def remark(elem, options = {})
68
+ (String === elem ? @doc.at(elem) : elem).to_markdown(options)
69
+ end
70
+
71
+ it "should return empty string for empty document" do
72
+ remark(Hpricot('')).should == ''
73
+ end
74
+
75
+ it "should ignore DOCTYPE, HEAD and XML processing instructions" do
76
+ remark('head').should be_nil
77
+ remark(@doc.children[0]).should be_nil # doctype
78
+ remark(@doc.children[2]).should be_nil # xmldecl
79
+ end
80
+
81
+ it "should have whitespace nodes respond to blank" do
82
+ @doc.at('a[@name]').children.first.blank?
83
+ end
84
+
85
+ it "should support headings" do
86
+ remark('h1').should == "# Sample **Remark** document"
87
+ remark('h2').should == "## The content"
88
+ end
89
+
90
+ it "should support paragraphs" do
91
+ remark('p').should == "A paragraph with _nested_ **content** and <i>Remark</i>-supported elements."
92
+ end
93
+
94
+ it "should split paragraphs with an empty line" do
95
+ remark('#content').should == "First\n\nSecond\n\nSome content _in-between_\n\nThird"
96
+ end
97
+
98
+ it "should keep full HTML for paragraphs if they have attributes" do
99
+ remark('p.foo').should == '<p class="foo">I has classname</p>'
100
+ end
101
+
102
+ it "should not break on empty DIV" do
103
+ remark('#empty').should == ""
104
+ end
105
+
106
+ it "should support blockquotes" do
107
+ remark('blockquote > blockquote').should == "> Nested famous quote"
108
+ remark('blockquote').should == "> Some famous quote\n> \n> > Nested famous quote"
109
+ end
110
+
111
+ it "should support preformatted text" do
112
+ remark('div.code').should == "Sample code:\n\n def preformatted\n text\n end"
113
+ end
114
+
115
+ it "should support image tags" do
116
+ remark('img[@alt]').should == '![cow](moo.jpg)'
117
+ remark('img[@width]').should == '<img src="moo.jpg" alt="cow" width="16" />'
118
+ end
119
+
120
+ it "should support code spans" do
121
+ remark('code').should == "`simple`"
122
+ remark('code ~ code').should == "`` comp ` lex ``"
123
+ remark('code ~ code ~ code').should == "`<tag>`"
124
+ end
125
+
126
+ it "should support BR" do
127
+ remark('#br').should == "Foo \nbar\n\nFoo \nbar `baz`\n\nFoo\n\nBar"
128
+ end
129
+
130
+ it "should support unordered list" do
131
+ remark('ul').should == "* First\n* Second"
132
+ end
133
+
134
+ it "should support ordered list" do
135
+ remark('ol').should == "1. First\n2. Second"
136
+ end
137
+ end
138
+
@@ -0,0 +1,157 @@
1
+ require 'remark'
2
+
3
+ describe Remark do
4
+ def remark(source, options = {})
5
+ options = {:reference_links => false}.merge(options)
6
+ described_class.new(source, options).to_markdown
7
+ end
8
+
9
+ it "should let through text content" do
10
+ remark("Foo bar").should == 'Foo bar'
11
+ remark("Foo bar\nbaz").should == 'Foo bar baz'
12
+ end
13
+
14
+ it "should preserve elements in remarked blocks" do
15
+ remark("<p>Foo <ins>bar</ins></p>").should == 'Foo <ins>bar</ins>'
16
+ remark("<h2>Foo <ins>bar</ins></h2>").should == '## Foo <ins>bar</ins>'
17
+ end
18
+
19
+ it "should unescape HTML entities" do
20
+ remark("Foo&amp;bar").should == 'Foo&bar'
21
+ remark("<p>If you&#8217;re doing all your development on the &#8220;master&#8221; branch, you&#8217;re not using git").should == "If you’re doing all your development on the “master” branch, you’re not using git"
22
+ end
23
+
24
+ it "should leave unknown elements intact" do
25
+ remark(<<-HTML).should == "Foo\n\n<table>data</table>\n\nBar"
26
+ <p>Foo</p>
27
+ <table>data</table>
28
+ <p>Bar</p>
29
+ HTML
30
+ end
31
+
32
+ describe "whitespace" do
33
+ it "should strip excess whitespace" do
34
+ remark(<<-HTML).should == "Foo bar"
35
+ <p>
36
+ Foo
37
+ bar
38
+ </p>
39
+ HTML
40
+ end
41
+
42
+ it "should strip whitespace in text nodes between processed nodes" do
43
+ remark(<<-HTML).should == "Foo\n\nbar\n\nBaz"
44
+ <p>Foo</p>
45
+
46
+ bar
47
+ <p>Baz</p>
48
+ HTML
49
+ end
50
+ end
51
+
52
+ describe "lists" do
53
+ it "should support lists" do
54
+ remark(<<-HTML).should == "* foo\n* bar"
55
+ <ul>
56
+ <li>foo</li>
57
+ <li>bar</li>
58
+ </ul>
59
+ HTML
60
+
61
+ remark(<<-HTML).should == "1. foo\n2. bar"
62
+ <ol>
63
+ <li>foo</li>
64
+ <li>bar</li>
65
+ </ol>
66
+ HTML
67
+ end
68
+
69
+ it "should support lists with nested content" do
70
+ remark(<<-HTML).should == "* foo\n \n bar\n\n* baz"
71
+ <ul>
72
+ <li><p>foo</p><p>bar</p></li>
73
+ <li><p>baz</p></li>
74
+ </ul>
75
+ HTML
76
+ end
77
+
78
+ it "should output malformed lists as HTML" do
79
+ remark(<<-HTML).should == "<ul>\n <span>bar</span>\n </ul>"
80
+ <ul>
81
+ <span>bar</span>
82
+ </ul>
83
+ HTML
84
+ end
85
+ end
86
+
87
+ it "should support preformatted blocks" do
88
+ remark("<pre>def foo\n bar\nend</pre>").should == " def foo\n bar\n end"
89
+ remark("<pre><code>def foo\n &lt;bar&gt;\nend</code></pre>").should == " def foo\n <bar>\n end"
90
+ remark("<pre>def foo\n</pre>").should == " def foo"
91
+ end
92
+
93
+ describe "inline" do
94
+ it "should remark inline elements" do
95
+ remark("<p>I'm so <strong>strong</strong></p>").should == "I'm so **strong**"
96
+ remark("<p>I'm so <em>emo</em></p>").should == "I'm so _emo_"
97
+ remark("<ul><li><em>Inline</em> stuff in <strong>lists</strong></li></ul>").should == "* _Inline_ stuff in **lists**"
98
+ remark("<h1>Headings <em>too</em></h1>").should == '# Headings _too_'
99
+ end
100
+
101
+ it "should handle nested inline elements" do
102
+ remark("<p>I <strong>love <code>code</code></strong></p>").should == "I **love `code`**"
103
+ remark("<p>I <a href='#'>am <em>fine</em></a></p>").should == "I [am _fine_](#)"
104
+ end
105
+ end
106
+
107
+ describe "hyperlinks" do
108
+ it "should support hyperlinks" do
109
+ remark("<p>Click <a href='http://mislav.uniqpath.com'>here</a></p>").should ==
110
+ "Click [here](http://mislav.uniqpath.com)"
111
+ remark("<a href='/foo' title='bar'>baz</a>").should == '[baz](/foo "bar")'
112
+ end
113
+
114
+ it "should have reference-style hyperlinks" do
115
+ remark("<p>Click <a href='foo' title='mooslav'>here</a> and <a href='bar'>there</a></p>", :reference_links => true).should ==
116
+ "Click [here][1] and [there][2]\n\n\n[1]: foo \"mooslav\"\n[2]: bar"
117
+ remark("<p>Click <a href='foo'>here</a> and <a href='foo'>there</a></p>", :reference_links => true).should ==
118
+ "Click [here][1] and [there][1]\n\n\n[1]: foo"
119
+ remark("", :reference_links => true).should == ""
120
+ end
121
+ end
122
+
123
+ it "should support ignores" do
124
+ remark("<p>Foo <span>bar</span> baz</p>", :ignores => ['span']).should == "Foo baz"
125
+ end
126
+
127
+ describe "scoping" do
128
+ before do
129
+ @html = <<-HTML
130
+ <html>
131
+ <body>
132
+ <div id="div1">
133
+ <p>Only 1 paragraph</p>
134
+ </div>
135
+ <div id="div3">
136
+ <p>Wow, 3 paragraphs</p>
137
+ <p>This must be where the content is</p>
138
+ <p>I'm sure</p>
139
+ </div>
140
+ <div id="div2">
141
+ <p>Only 2 paragraphs</p>
142
+ <p>How disappointing</p>
143
+ </div>
144
+ </body>
145
+ </html>
146
+ HTML
147
+ end
148
+
149
+ it "should scope to the most likely element that holds content" do
150
+ remark(@html).should == "Wow, 3 paragraphs\n\nThis must be where the content is\n\nI'm sure"
151
+ end
152
+
153
+ it "should scope to the explicit scope" do
154
+ remark(@html, :scope => '#div2').should == "Only 2 paragraphs\n\nHow disappointing"
155
+ end
156
+ end
157
+ end
@@ -0,0 +1,53 @@
1
+ <h1>Remark &mdash; HTML to Markdown converter</h1>
2
+
3
+ <p>This is a sample document which will get updated as Remark understands more HTML.
4
+ It reflects what's currently supported.</p>
5
+
6
+ <p class="nice">Known block elements are left intact if they have attributes.
7
+ Markdown doesn't have a syntax for them.</p>
8
+
9
+ <table>
10
+ <tr>
11
+ <td>Elements that can't be represented in Markdown are left intact.</td>
12
+ </tr>
13
+ </table>
14
+
15
+ <p>SCRIPT and HEAD tags are swallowed, as browsers don't render them as content.</p>
16
+
17
+ <script type="text/javascript">
18
+ alert("I will not survive")
19
+ </script>
20
+
21
+ <p>Remark supports Markdown syntax for <em>inline</em> markup.
22
+ <a href="http://github.com/mislav">Hyperlinks</a> and <code>code spans</code> are a must.</p>
23
+
24
+ <ul>
25
+ <li>List items too;</li>
26
+ <li>ordered or unordered.</li>
27
+ </ul>
28
+
29
+ <ol>
30
+ <li><p>Paragraphs in list items</p></li>
31
+ <li><p>Make them have one blank line between them in Markdown</p></li>
32
+ <li>
33
+ <p>Some list items even have multiple paragraphs</p>
34
+ <p>That shouldn't be too hard to do … right?</p>
35
+ <pre>code blocks too</pre>
36
+ </li>
37
+ </ol>
38
+
39
+ <p>Remark supports BR elements in paragraphs,<br>
40
+ although people tend to abuse them.</p>
41
+
42
+ <pre><code>And who would forget
43
+ Preformatted code blocks :)</code></pre>
44
+
45
+ <p>Notice how it handles <img src="moo.jpg" alt="images" title="Awesum img"> in a nice way.</p>
46
+
47
+ <blockquote>
48
+ <p>I think</p>
49
+ <p>therefore I am</p>
50
+ <blockquote>
51
+ <p>Nested blockquotes</p>
52
+ </blockquote>
53
+ </blockquote>
metadata ADDED
@@ -0,0 +1,83 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: remark
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.3.1
5
+ platform: ruby
6
+ authors:
7
+ - "Mislav Marohni\xC4\x87"
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-11-25 00:00:00 +01:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: hpricot
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ~>
22
+ - !ruby/object:Gem::Version
23
+ version: 0.8.2
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: rspec
27
+ type: :development
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: 1.2.9
34
+ version:
35
+ description: Remark turns simple HTML documents or content in web pages to Markdown source.
36
+ email: mislav.marohnic@gmail.com
37
+ executables: []
38
+
39
+ extensions: []
40
+
41
+ extra_rdoc_files: []
42
+
43
+ files:
44
+ - Rakefile
45
+ - bin/remark
46
+ - lib/remark/core_ext.rb
47
+ - lib/remark/hpricot_ext.rb
48
+ - lib/remark.rb
49
+ - spec/hpricot_ext_spec.rb
50
+ - spec/remark_spec.rb
51
+ - spec/sample.html
52
+ - README.markdown
53
+ - LICENSE
54
+ has_rdoc: false
55
+ homepage: http://github.com/mislav/remark
56
+ licenses: []
57
+
58
+ post_install_message:
59
+ rdoc_options: []
60
+
61
+ require_paths:
62
+ - lib
63
+ required_ruby_version: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ version: "0"
68
+ version:
69
+ required_rubygems_version: !ruby/object:Gem::Requirement
70
+ requirements:
71
+ - - ">="
72
+ - !ruby/object:Gem::Version
73
+ version: "0"
74
+ version:
75
+ requirements: []
76
+
77
+ rubyforge_project:
78
+ rubygems_version: 1.3.5
79
+ signing_key:
80
+ specification_version: 3
81
+ summary: HTML to Markdown converter
82
+ test_files: []
83
+