remark 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE ADDED
@@ -0,0 +1,18 @@
1
+ Copyright (c) 2009 Mislav Marohnić
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
4
+ this software and associated documentation files (the "Software"), to deal in
5
+ the Software without restriction, including without limitation the rights to
6
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
7
+ the Software, and to permit persons to whom the Software is furnished to do so,
8
+ subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in all
11
+ copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
15
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
16
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
17
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
18
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,29 @@
1
+ Remark — HTML→Markdown tool
2
+ ===========================
3
+
4
+ <i>Remark</i> parses HTML and delivers proper Markdown.
5
+
6
+ Usage
7
+ -----
8
+
9
+ From command-line:
10
+
11
+ remark path/to/file.html
12
+
13
+ or by STDIN:
14
+
15
+ echo "..." | remark
16
+
17
+ You can try feeding it a document from the web:
18
+
19
+ curl -s daringfireball.net/projects/markdown/basics | remark > result.markdown
20
+
21
+ See how it does.
22
+
23
+ If you've cloned the repository, invoke the binary like this:
24
+
25
+ ruby -Ilib -rubygems bin/remark spec/sample.html
26
+
27
+ And this is how you use it from Ruby code:
28
+
29
+ Remark.new('<h1>My document</h1><p>Some content</p>').to_markdown
@@ -0,0 +1,31 @@
1
+ desc "renders the spec/sample.html to Markdown"
2
+ task :sample do
3
+ system %(ruby -Ilib -rubygems bin/remark spec/sample.html)
4
+ end
5
+
6
+ desc "generates .gemspec file"
7
+ task :gemspec do
8
+ spec = Gem::Specification.new do |gem|
9
+ gem.name = "remark"
10
+ gem.version = '0.3.0'
11
+
12
+ gem.summary = "HTML to Markdown converter"
13
+ gem.email = "mislav.marohnic@gmail.com"
14
+ gem.homepage = "http://github.com/mislav/remark"
15
+ gem.authors = ["Mislav Marohnić"]
16
+ gem.has_rdoc = false
17
+
18
+ gem.files = FileList['Rakefile', '{bin,lib,rails,spec}/**/*', 'README*', 'LICENSE*'] & `git ls-files`.split("\n")
19
+ gem.executables = Dir['bin/*'].map { |f| File.basename(f) }
20
+ end
21
+
22
+ spec_string = spec.to_ruby
23
+
24
+ begin
25
+ Thread.new { eval("$SAFE = 3\n#{spec_string}", binding) }.join
26
+ rescue
27
+ abort "unsafe gemspec: #{$!}"
28
+ else
29
+ File.open("#{spec.name}.gemspec", 'w') { |file| file.write spec_string }
30
+ end
31
+ end
@@ -0,0 +1,22 @@
1
+ #!/usr/bin/env ruby
2
+ require 'optparse'
3
+ require 'remark'
4
+
5
+ options = {}
6
+ OptionParser.new do |opts|
7
+ opts.banner = "Usage: remark [options] [FILE]"
8
+
9
+ opts.on("-n", "--inline-links", "Render link URLs inline (instead of reference-style)") do |inline|
10
+ options[:reference_links] = !inline
11
+ end
12
+
13
+ opts.on("-s", "--scope EXPR", "Scope to a spefic CSS/XPath expression in the HTML document") do |scope|
14
+ options[:scope] = scope
15
+ end
16
+
17
+ opts.on("-i", "--ignore EXPR", "Ignore elements that match CSS/XPath expression") do |expr|
18
+ (options[:ignores] ||= []) << expr
19
+ end
20
+ end.parse!
21
+
22
+ puts Remark.new(ARGF.read, options).to_markdown
@@ -0,0 +1,61 @@
1
+ require 'remark/hpricot_ext'
2
+
3
+ class Remark
4
+ DEFAULT_OPTIONS = { :reference_links => true }
5
+
6
+ def initialize(source, options = {})
7
+ @doc = Hpricot(source)
8
+ @options = DEFAULT_OPTIONS.merge options
9
+ @links = []
10
+ @ignored_elements = nil
11
+ end
12
+
13
+ def to_markdown
14
+ parent = scope
15
+ collect_ignored_elements(parent)
16
+ links = @options[:links] = [] unless inline_links?
17
+ result = parent.to_markdown(@options)
18
+ result + (inline_links? || links.empty?? '' : "\n\n\n" + output_reference_links(links))
19
+ end
20
+
21
+ def scope
22
+ if scope = @options[:scope]
23
+ @doc.at(scope)
24
+ elsif body = @doc.at('/html/body')
25
+ candidates = (body / 'p').inject(Hash.new(0)) do |memo, para|
26
+ memo[para.parent] += 1
27
+ memo
28
+ end.invert
29
+
30
+ candidates[candidates.keys.max]
31
+ else
32
+ @doc
33
+ end
34
+ end
35
+
36
+ def inline_links?
37
+ !@options[:reference_links]
38
+ end
39
+
40
+ def output_reference_links(links)
41
+ references = []
42
+ links.each_with_index do |(href, title), i|
43
+ references << "[#{i + 1}]: #{href}#{title ? ' ' + title.inspect : ''}"
44
+ end
45
+ references.join("\n")
46
+ end
47
+
48
+ private
49
+
50
+ def ignore_element?(elem)
51
+ IGNORE.include?(elem.name) or (@ignored_elements and @ignored_elements.include?(elem))
52
+ end
53
+
54
+ def collect_ignored_elements(scope)
55
+ if @options[:ignores]
56
+ @options[:ignored_elements] = @options[:ignores].map do |expr|
57
+ scope.search(expr).to_a
58
+ end.flatten.uniq
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,25 @@
1
+ Object.class_eval do
2
+ def blank?() false end
3
+ end
4
+
5
+ NilClass.class_eval do
6
+ def blank?() true end
7
+ end
8
+
9
+ String.class_eval do
10
+ def blank?
11
+ self.empty? or !!(self =~ /\A\s+\Z/)
12
+ end
13
+
14
+ def squeeze_whitespace
15
+ self.tr("\n\t", ' ').squeeze(' ')
16
+ end
17
+
18
+ def indent(with = ' ' * 4)
19
+ self.gsub(/^/, with)
20
+ end
21
+ end
22
+
23
+ Hpricot::Text.module_eval do
24
+ def blank?() to_s.blank? end
25
+ end
@@ -0,0 +1,212 @@
1
+ require 'hpricot'
2
+ require 'remark/core_ext'
3
+
4
+ # this applies the default behavior to virtually all Hpricot classes
5
+ Hpricot::Node.module_eval do
6
+ def to_markdown(options = {}) nil end
7
+ def markdown_block?() false end
8
+ end
9
+
10
+ # nothing special to process on Text or CData
11
+ Hpricot::Text.module_eval do
12
+ def to_markdown(options = {}) to_s.squeeze_whitespace end
13
+ end
14
+
15
+ Hpricot::CData.module_eval do
16
+ def to_markdown(options = {}) to_s.squeeze_whitespace end
17
+ end
18
+
19
+ # elements that have children
20
+ Hpricot::Container.module_eval do
21
+ def to_markdown(options = {})
22
+ return '' unless self.children
23
+ previous_was_block = false
24
+ parent_is_block = self.markdown_block?
25
+
26
+ # recurse over this element's children
27
+ content = self.children.inject([]) do |all, child|
28
+ current_is_block = child.markdown_block?
29
+ child_content = child.to_markdown(options)
30
+
31
+ # skip this node if its markdown is nil, empty or, in case
32
+ # that the previous element was a block, all-whitespace
33
+ unless child_content.nil? or child_content.empty? or (previous_was_block and child_content.blank?)
34
+ # handle separating of adjacent markdown blocks with an empty line
35
+ if not all.empty? and current_is_block or previous_was_block
36
+ # strip trailing whitespace if we're opening a new block
37
+ all.last.blank?? all.pop : all.last.rstrip!
38
+ # guard against adding a newline at the beginning
39
+ all << "\n\n" if all.any?
40
+ end
41
+
42
+ unless 'pre' == child.name
43
+ # strip whitespace from the left if ...
44
+ child_content.lstrip! if previous_was_block or # we're adjacent to a block
45
+ (parent_is_block and child == self.children.first) or # this is the first child
46
+ (not all.empty? and all.last =~ / ( \n)?$/) # we're following a space or a forced line break token
47
+
48
+
49
+ # strip whitespace from the right if this is the last node in a block
50
+ child_content.rstrip! if parent_is_block and self.children.last == child
51
+ end
52
+
53
+ all << child_content
54
+ end
55
+
56
+ previous_was_block = current_is_block
57
+ all
58
+ end
59
+
60
+ result = content.join('')
61
+ return result
62
+ end
63
+ end
64
+
65
+ # elements without children
66
+ Hpricot::Leaf.module_eval do
67
+ def to_markdown(options = {})
68
+ inner_text.squeeze_whitespace if elem?
69
+ end
70
+ end
71
+
72
+ Hpricot::Elem.module_eval do
73
+ IGNORE = %w(script head style)
74
+ ALLOWED_EMPTY = %w(img br hr )
75
+ MARKDOWN_BLOCK = %w(p blockquote h1 h2 h3 h4 h5 h6 pre hr)
76
+ MARKDOWN_INLINE = %w(em strong code a img br)
77
+ MARKDOWN_RECOGNIZED = MARKDOWN_BLOCK + MARKDOWN_INLINE + %w(div)
78
+ HTML_BLOCK = MARKDOWN_BLOCK + %w(ul ol dl div noscript form table address fieldset)
79
+
80
+ def to_markdown(options = {})
81
+ return nil if markdown_ignored?(options)
82
+ return '' if markdown_empty?
83
+ return to_s unless markdown_supported_attributes?
84
+
85
+ case name
86
+ when 'div', 'noscript'
87
+ super
88
+ when 'p'
89
+ super
90
+ when /^h([1-6])$/
91
+ ('#' * $1.to_i) + ' ' + super
92
+ when 'ul', 'ol'
93
+ remark_list(options)
94
+ when 'li'
95
+ content = super
96
+ content = content.indent if children.any? { |e| e.markdown_block? }
97
+ content
98
+ when 'pre'
99
+ inner_text.rstrip.indent
100
+ when 'em'
101
+ "_#{super}_"
102
+ when 'strong'
103
+ "**#{super}**"
104
+ when 'code'
105
+ code = inner_text
106
+ code.index('`') ? "`` #{code} ``" : "`#{code}`"
107
+ when 'a'
108
+ remark_link(super, self['href'], self['title'], options)
109
+ when 'img'
110
+ '!' + remark_link(self['alt'], self['src'], self['title'], :reference_links => false)
111
+ when 'blockquote'
112
+ super.indent('> ')
113
+ when 'br'
114
+ " \n" + inner_html
115
+ else
116
+ to_s
117
+ end
118
+ end
119
+
120
+ def remark_list(options = {})
121
+ unordered = self.name == 'ul'
122
+ marker = unordered ? '*' : 0
123
+ nested = false
124
+
125
+ items = self.children_of_type('li').map do |item|
126
+ item = item.to_markdown(options)
127
+ current = unordered ? marker : "#{marker += 1}."
128
+ if item =~ /\A\s/
129
+ nested = true
130
+ item[0, current.length] = current
131
+ item
132
+ else
133
+ current + ' ' + item
134
+ end
135
+ end
136
+
137
+ items.join("\n" * (nested ? 2 : 1))
138
+ end
139
+
140
+ def markdown_block?
141
+ HTML_BLOCK.include?(name)
142
+ end
143
+
144
+ def markdown_recognized?
145
+ MARKDOWN_RECOGNIZED.include?(name)
146
+ end
147
+
148
+ protected
149
+
150
+ def markdown_ignored?(options)
151
+ IGNORE.include?(name) or
152
+ (options[:ignored_elements] and options[:ignored_elements].include?(self))
153
+ end
154
+
155
+ def markdown_empty?
156
+ empty? and markdown_recognized? and not ALLOWED_EMPTY.include?(name)
157
+ end
158
+
159
+ def markdown_supported_attributes?
160
+ case name
161
+ when 'div'
162
+ true
163
+ when 'a'
164
+ attribute_names_match?('href', 'title')
165
+ when 'img'
166
+ attribute_names_match?(%w(alt src), 'title')
167
+ when 'ol', 'ul'
168
+ attributes.empty? and children.all? do |item|
169
+ not item.elem? or (item.name == 'li' and item.attributes.empty?)
170
+ end
171
+ else
172
+ attributes.empty?
173
+ end
174
+ end
175
+
176
+ def attribute_names_match?(only, optional = nil)
177
+ names = attributes.keys.sort
178
+ names -= Array(optional) if optional
179
+ names == Array(only)
180
+ end
181
+
182
+ def remark_link(text, href, title = nil, options = {})
183
+ if options[:reference_links]
184
+ if existing = options[:links].find { |h, t| href == h }
185
+ num = options[:links].index(existing) + 1
186
+ else
187
+ options[:links] << [href, title]
188
+ num = options[:links].length
189
+ end
190
+ "[#{text}][#{num}]"
191
+ else
192
+ title_markup = title ? %( "#{title}") : ''
193
+ "[#{text}](#{href}#{title_markup})"
194
+ end
195
+ end
196
+ end
197
+
198
+ Hpricot::Attributes.class_eval do
199
+ methods = instance_methods.map { |m| m.to_sym }
200
+
201
+ unless methods.include? :empty?
202
+ def empty?
203
+ self.to_hash.empty?
204
+ end
205
+ end
206
+
207
+ unless methods.include? :keys
208
+ def keys
209
+ self.to_hash.keys
210
+ end
211
+ end
212
+ end
@@ -0,0 +1,138 @@
1
+ require 'remark/hpricot_ext'
2
+
3
+ describe Hpricot, "remark extensions" do
4
+ before(:all) do
5
+ @doc = Hpricot(<<-HTML.strip)
6
+ <?xml version="moo" ?>
7
+ <!DOCTYPE html>
8
+ <html>
9
+ <head>
10
+ <title>Sample document</title>
11
+ </head>
12
+ <body>
13
+ <h1>Sample <strong>Remark</strong> document</h1>
14
+ <p>
15
+ A paragraph with <em>nested</em> <strong>content</strong>
16
+ and <i>Remark</i>-supported elements.
17
+ </p>
18
+
19
+ <a name="content"> </a>
20
+ <h2>The content</h2>
21
+ <div id="content">
22
+ <p>First</p>
23
+ <p>Second</p>
24
+ Some content
25
+ <em>in-between</em>
26
+ <p>Third</p>
27
+ </div>
28
+ <p class="foo">I has classname</p>
29
+
30
+ <div id="empty"></div>
31
+ <blockquote>
32
+ Some famous quote
33
+ <blockquote>Nested famous quote</blockquote>
34
+ </blockquote>
35
+ <div class="code">
36
+ <p>Sample code:</p>
37
+ <pre>def preformatted
38
+ text
39
+ end
40
+ </pre>
41
+ </div>
42
+ <img src='moo.jpg' alt='cow'>
43
+ <img src='moo.jpg' alt='cow' width='16'>
44
+
45
+ <code>simple</code> <code>comp ` lex</code> <code>&lt;tag&gt;</code>
46
+
47
+ <div id="br">
48
+ <p>Foo<br>bar</p>
49
+ <p>Foo<br>
50
+ bar <code>baz</code></p>
51
+ <p>Foo</p><br><br><p>Bar</p><br>
52
+ </div>
53
+
54
+ <ul>
55
+ <li>First</li>
56
+ <li>Second</li>
57
+ </ul>
58
+ <ol>
59
+ <li>First</li>
60
+ <li>Second</li>
61
+ </ol>
62
+ </body>
63
+ </html>
64
+ HTML
65
+ end
66
+
67
+ def remark(elem, options = {})
68
+ (String === elem ? @doc.at(elem) : elem).to_markdown(options)
69
+ end
70
+
71
+ it "should return empty string for empty document" do
72
+ remark(Hpricot('')).should == ''
73
+ end
74
+
75
+ it "should ignore DOCTYPE, HEAD and XML processing instructions" do
76
+ remark('head').should be_nil
77
+ remark(@doc.children[0]).should be_nil # doctype
78
+ remark(@doc.children[2]).should be_nil # xmldecl
79
+ end
80
+
81
+ it "should have whitespace nodes respond to blank" do
82
+ @doc.at('a[@name]').children.first.blank?
83
+ end
84
+
85
+ it "should support headings" do
86
+ remark('h1').should == "# Sample **Remark** document"
87
+ remark('h2').should == "## The content"
88
+ end
89
+
90
+ it "should support paragraphs" do
91
+ remark('p').should == "A paragraph with _nested_ **content** and <i>Remark</i>-supported elements."
92
+ end
93
+
94
+ it "should split paragraphs with an empty line" do
95
+ remark('#content').should == "First\n\nSecond\n\nSome content _in-between_\n\nThird"
96
+ end
97
+
98
+ it "should keep full HTML for paragraphs if they have attributes" do
99
+ remark('p.foo').should == '<p class="foo">I has classname</p>'
100
+ end
101
+
102
+ it "should not break on empty DIV" do
103
+ remark('#empty').should == ""
104
+ end
105
+
106
+ it "should support blockquotes" do
107
+ remark('blockquote > blockquote').should == "> Nested famous quote"
108
+ remark('blockquote').should == "> Some famous quote\n> \n> > Nested famous quote"
109
+ end
110
+
111
+ it "should support preformatted text" do
112
+ remark('div.code').should == "Sample code:\n\n def preformatted\n text\n end"
113
+ end
114
+
115
+ it "should support image tags" do
116
+ remark('img[@alt]').should == '![cow](moo.jpg)'
117
+ remark('img[@width]').should == '<img src="moo.jpg" alt="cow" width="16" />'
118
+ end
119
+
120
+ it "should support code spans" do
121
+ remark('code').should == "`simple`"
122
+ remark('code ~ code').should == "`` comp ` lex ``"
123
+ remark('code ~ code ~ code').should == "`<tag>`"
124
+ end
125
+
126
+ it "should support BR" do
127
+ remark('#br').should == "Foo \nbar\n\nFoo \nbar `baz`\n\nFoo\n\nBar"
128
+ end
129
+
130
+ it "should support unordered list" do
131
+ remark('ul').should == "* First\n* Second"
132
+ end
133
+
134
+ it "should support ordered list" do
135
+ remark('ol').should == "1. First\n2. Second"
136
+ end
137
+ end
138
+
@@ -0,0 +1,157 @@
1
+ require 'remark'
2
+
3
+ describe Remark do
4
+ def remark(source, options = {})
5
+ options = {:reference_links => false}.merge(options)
6
+ described_class.new(source, options).to_markdown
7
+ end
8
+
9
+ it "should let through text content" do
10
+ remark("Foo bar").should == 'Foo bar'
11
+ remark("Foo bar\nbaz").should == 'Foo bar baz'
12
+ end
13
+
14
+ it "should preserve elements in remarked blocks" do
15
+ remark("<p>Foo <ins>bar</ins></p>").should == 'Foo <ins>bar</ins>'
16
+ remark("<h2>Foo <ins>bar</ins></h2>").should == '## Foo <ins>bar</ins>'
17
+ end
18
+
19
+ it "should unescape HTML entities" do
20
+ remark("Foo&amp;bar").should == 'Foo&bar'
21
+ remark("<p>If you&#8217;re doing all your development on the &#8220;master&#8221; branch, you&#8217;re not using git").should == "If you’re doing all your development on the “master” branch, you’re not using git"
22
+ end
23
+
24
+ it "should leave unknown elements intact" do
25
+ remark(<<-HTML).should == "Foo\n\n<table>data</table>\n\nBar"
26
+ <p>Foo</p>
27
+ <table>data</table>
28
+ <p>Bar</p>
29
+ HTML
30
+ end
31
+
32
+ describe "whitespace" do
33
+ it "should strip excess whitespace" do
34
+ remark(<<-HTML).should == "Foo bar"
35
+ <p>
36
+ Foo
37
+ bar
38
+ </p>
39
+ HTML
40
+ end
41
+
42
+ it "should strip whitespace in text nodes between processed nodes" do
43
+ remark(<<-HTML).should == "Foo\n\nbar\n\nBaz"
44
+ <p>Foo</p>
45
+
46
+ bar
47
+ <p>Baz</p>
48
+ HTML
49
+ end
50
+ end
51
+
52
+ describe "lists" do
53
+ it "should support lists" do
54
+ remark(<<-HTML).should == "* foo\n* bar"
55
+ <ul>
56
+ <li>foo</li>
57
+ <li>bar</li>
58
+ </ul>
59
+ HTML
60
+
61
+ remark(<<-HTML).should == "1. foo\n2. bar"
62
+ <ol>
63
+ <li>foo</li>
64
+ <li>bar</li>
65
+ </ol>
66
+ HTML
67
+ end
68
+
69
+ it "should support lists with nested content" do
70
+ remark(<<-HTML).should == "* foo\n \n bar\n\n* baz"
71
+ <ul>
72
+ <li><p>foo</p><p>bar</p></li>
73
+ <li><p>baz</p></li>
74
+ </ul>
75
+ HTML
76
+ end
77
+
78
+ it "should output malformed lists as HTML" do
79
+ remark(<<-HTML).should == "<ul>\n <span>bar</span>\n </ul>"
80
+ <ul>
81
+ <span>bar</span>
82
+ </ul>
83
+ HTML
84
+ end
85
+ end
86
+
87
+ it "should support preformatted blocks" do
88
+ remark("<pre>def foo\n bar\nend</pre>").should == " def foo\n bar\n end"
89
+ remark("<pre><code>def foo\n &lt;bar&gt;\nend</code></pre>").should == " def foo\n <bar>\n end"
90
+ remark("<pre>def foo\n</pre>").should == " def foo"
91
+ end
92
+
93
+ describe "inline" do
94
+ it "should remark inline elements" do
95
+ remark("<p>I'm so <strong>strong</strong></p>").should == "I'm so **strong**"
96
+ remark("<p>I'm so <em>emo</em></p>").should == "I'm so _emo_"
97
+ remark("<ul><li><em>Inline</em> stuff in <strong>lists</strong></li></ul>").should == "* _Inline_ stuff in **lists**"
98
+ remark("<h1>Headings <em>too</em></h1>").should == '# Headings _too_'
99
+ end
100
+
101
+ it "should handle nested inline elements" do
102
+ remark("<p>I <strong>love <code>code</code></strong></p>").should == "I **love `code`**"
103
+ remark("<p>I <a href='#'>am <em>fine</em></a></p>").should == "I [am _fine_](#)"
104
+ end
105
+ end
106
+
107
+ describe "hyperlinks" do
108
+ it "should support hyperlinks" do
109
+ remark("<p>Click <a href='http://mislav.uniqpath.com'>here</a></p>").should ==
110
+ "Click [here](http://mislav.uniqpath.com)"
111
+ remark("<a href='/foo' title='bar'>baz</a>").should == '[baz](/foo "bar")'
112
+ end
113
+
114
+ it "should have reference-style hyperlinks" do
115
+ remark("<p>Click <a href='foo' title='mooslav'>here</a> and <a href='bar'>there</a></p>", :reference_links => true).should ==
116
+ "Click [here][1] and [there][2]\n\n\n[1]: foo \"mooslav\"\n[2]: bar"
117
+ remark("<p>Click <a href='foo'>here</a> and <a href='foo'>there</a></p>", :reference_links => true).should ==
118
+ "Click [here][1] and [there][1]\n\n\n[1]: foo"
119
+ remark("", :reference_links => true).should == ""
120
+ end
121
+ end
122
+
123
+ it "should support ignores" do
124
+ remark("<p>Foo <span>bar</span> baz</p>", :ignores => ['span']).should == "Foo baz"
125
+ end
126
+
127
+ describe "scoping" do
128
+ before do
129
+ @html = <<-HTML
130
+ <html>
131
+ <body>
132
+ <div id="div1">
133
+ <p>Only 1 paragraph</p>
134
+ </div>
135
+ <div id="div3">
136
+ <p>Wow, 3 paragraphs</p>
137
+ <p>This must be where the content is</p>
138
+ <p>I'm sure</p>
139
+ </div>
140
+ <div id="div2">
141
+ <p>Only 2 paragraphs</p>
142
+ <p>How disappointing</p>
143
+ </div>
144
+ </body>
145
+ </html>
146
+ HTML
147
+ end
148
+
149
+ it "should scope to the most likely element that holds content" do
150
+ remark(@html).should == "Wow, 3 paragraphs\n\nThis must be where the content is\n\nI'm sure"
151
+ end
152
+
153
+ it "should scope to the explicit scope" do
154
+ remark(@html, :scope => '#div2').should == "Only 2 paragraphs\n\nHow disappointing"
155
+ end
156
+ end
157
+ end
@@ -0,0 +1,53 @@
1
+ <h1>Remark &mdash; HTML to Markdown converter</h1>
2
+
3
+ <p>This is a sample document which will get updated as Remark understands more HTML.
4
+ It reflects what's currently supported.</p>
5
+
6
+ <p class="nice">Known block elements are left intact if they have attributes.
7
+ Markdown doesn't have a syntax for them.</p>
8
+
9
+ <table>
10
+ <tr>
11
+ <td>Elements that can't be represented in Markdown are left intact.</td>
12
+ </tr>
13
+ </table>
14
+
15
+ <p>SCRIPT and HEAD tags are swallowed, as browsers don't render them as content.</p>
16
+
17
+ <script type="text/javascript">
18
+ alert("I will not survive")
19
+ </script>
20
+
21
+ <p>Remark supports Markdown syntax for <em>inline</em> markup.
22
+ <a href="http://github.com/mislav">Hyperlinks</a> and <code>code spans</code> are a must.</p>
23
+
24
+ <ul>
25
+ <li>List items too;</li>
26
+ <li>ordered or unordered.</li>
27
+ </ul>
28
+
29
+ <ol>
30
+ <li><p>Paragraphs in list items</p></li>
31
+ <li><p>Make them have one blank line between them in Markdown</p></li>
32
+ <li>
33
+ <p>Some list items even have multiple paragraphs</p>
34
+ <p>That shouldn't be too hard to do … right?</p>
35
+ <pre>code blocks too</pre>
36
+ </li>
37
+ </ol>
38
+
39
+ <p>Remark supports BR elements in paragraphs,<br>
40
+ although people tend to abuse them.</p>
41
+
42
+ <pre><code>And who would forget
43
+ Preformatted code blocks :)</code></pre>
44
+
45
+ <p>Notice how it handles <img src="moo.jpg" alt="images" title="Awesum img"> in a nice way.</p>
46
+
47
+ <blockquote>
48
+ <p>I think</p>
49
+ <p>therefore I am</p>
50
+ <blockquote>
51
+ <p>Nested blockquotes</p>
52
+ </blockquote>
53
+ </blockquote>
metadata ADDED
@@ -0,0 +1,83 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: remark
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.3.1
5
+ platform: ruby
6
+ authors:
7
+ - "Mislav Marohni\xC4\x87"
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-11-25 00:00:00 +01:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: hpricot
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ~>
22
+ - !ruby/object:Gem::Version
23
+ version: 0.8.2
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: rspec
27
+ type: :development
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ~>
32
+ - !ruby/object:Gem::Version
33
+ version: 1.2.9
34
+ version:
35
+ description: Remark turns simple HTML documents or content in web pages to Markdown source.
36
+ email: mislav.marohnic@gmail.com
37
+ executables: []
38
+
39
+ extensions: []
40
+
41
+ extra_rdoc_files: []
42
+
43
+ files:
44
+ - Rakefile
45
+ - bin/remark
46
+ - lib/remark/core_ext.rb
47
+ - lib/remark/hpricot_ext.rb
48
+ - lib/remark.rb
49
+ - spec/hpricot_ext_spec.rb
50
+ - spec/remark_spec.rb
51
+ - spec/sample.html
52
+ - README.markdown
53
+ - LICENSE
54
+ has_rdoc: false
55
+ homepage: http://github.com/mislav/remark
56
+ licenses: []
57
+
58
+ post_install_message:
59
+ rdoc_options: []
60
+
61
+ require_paths:
62
+ - lib
63
+ required_ruby_version: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ version: "0"
68
+ version:
69
+ required_rubygems_version: !ruby/object:Gem::Requirement
70
+ requirements:
71
+ - - ">="
72
+ - !ruby/object:Gem::Version
73
+ version: "0"
74
+ version:
75
+ requirements: []
76
+
77
+ rubyforge_project:
78
+ rubygems_version: 1.3.5
79
+ signing_key:
80
+ specification_version: 3
81
+ summary: HTML to Markdown converter
82
+ test_files: []
83
+