mislav-remark 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.markdown +18 -6
- data/Rakefile +2 -2
- data/lib/remark.rb +18 -3
- data/spec/remark_spec.rb +26 -0
- data/spec/sample.html +8 -0
- metadata +2 -2
data/README.markdown
CHANGED
@@ -1,17 +1,29 @@
|
|
1
|
-
Remark
|
2
|
-
|
1
|
+
Remark — HTML→Markdown tool
|
2
|
+
===========================
|
3
3
|
|
4
|
-
|
4
|
+
<i>Remark</i> parses HTML and delivers proper Markdown.
|
5
5
|
|
6
6
|
Usage
|
7
7
|
-----
|
8
8
|
|
9
9
|
From command-line:
|
10
10
|
|
11
|
-
|
11
|
+
remark path/to/file.html
|
12
|
+
|
13
|
+
or by STDIN:
|
14
|
+
|
15
|
+
echo "..." | remark
|
16
|
+
|
17
|
+
You can try feeding it a document from the web:
|
18
|
+
|
19
|
+
curl -s daringfireball.net/projects/markdown/basics | remark > result.markdown
|
12
20
|
|
13
|
-
|
21
|
+
See how it does.
|
22
|
+
|
23
|
+
If you've cloned the repository, invoke the binary like this:
|
24
|
+
|
25
|
+
ruby -Ilib -rubygems bin/remark spec/sample.html
|
14
26
|
|
15
|
-
|
27
|
+
And this is how you use it from Ruby code:
|
16
28
|
|
17
29
|
Remark.new('<h1>My document</h1><p>Some content</p>').to_markdown
|
data/Rakefile
CHANGED
@@ -8,8 +8,8 @@ task :gemspec do
|
|
8
8
|
gem.authors = ["Mislav Marohnić"]
|
9
9
|
gem.has_rdoc = false
|
10
10
|
|
11
|
-
gem.version = '0.
|
12
|
-
gem.files = FileList['Rakefile', '{bin,lib,rails,spec}/**/*', 'README*', 'LICENSE*']
|
11
|
+
gem.version = '0.2.0'
|
12
|
+
gem.files = FileList['Rakefile', '{bin,lib,rails,spec}/**/*', 'README*', 'LICENSE*'] & `git ls-files`.split("\n")
|
13
13
|
gem.executables = Dir['bin/*'].map { |f| File.basename(f) }
|
14
14
|
end
|
15
15
|
|
data/lib/remark.rb
CHANGED
@@ -6,7 +6,20 @@ class Remark
|
|
6
6
|
end
|
7
7
|
|
8
8
|
def to_markdown
|
9
|
-
remark_children(
|
9
|
+
remark_children(scope).join("\n\n")
|
10
|
+
end
|
11
|
+
|
12
|
+
def scope
|
13
|
+
if body = @doc.at('/html/body')
|
14
|
+
candidates = (body / 'p').inject(Hash.new(0)) do |memo, para|
|
15
|
+
memo[para.parent] += 1
|
16
|
+
memo
|
17
|
+
end.invert
|
18
|
+
|
19
|
+
candidates[candidates.keys.max]
|
20
|
+
else
|
21
|
+
@doc
|
22
|
+
end
|
10
23
|
end
|
11
24
|
|
12
25
|
IGNORE = %w(script head style)
|
@@ -35,7 +48,7 @@ class Remark
|
|
35
48
|
|
36
49
|
def remark_item(item)
|
37
50
|
if item.text?
|
38
|
-
item.to_s.gsub(/\n+/, ' ') unless item.to_s =~
|
51
|
+
item.to_s.gsub(/\n+/, ' ') unless item.to_s =~ /\A\s*\Z/
|
39
52
|
elsif item.elem?
|
40
53
|
if IGNORE.include?(item.name)
|
41
54
|
nil
|
@@ -71,6 +84,8 @@ class Remark
|
|
71
84
|
'!' + remark_link(elem.attributes['alt'], elem.attributes['src'], elem.attributes['title'])
|
72
85
|
when 'blockquote'
|
73
86
|
remark_children(elem).join("\n\n").gsub(/^/, '> ')
|
87
|
+
when 'br'
|
88
|
+
' ' + elem.inner_html
|
74
89
|
else
|
75
90
|
elem
|
76
91
|
end
|
@@ -82,7 +97,7 @@ class Remark
|
|
82
97
|
end
|
83
98
|
|
84
99
|
def remark_inline(elem)
|
85
|
-
remark_children(elem).join('')
|
100
|
+
remark_children(elem).join('').gsub(/\s{2,}/, ' ')
|
86
101
|
end
|
87
102
|
|
88
103
|
def remark_list(list)
|
data/spec/remark_spec.rb
CHANGED
@@ -92,5 +92,31 @@ describe Remark do
|
|
92
92
|
remark("<img src='moo.jpg' alt='cow'>").should == '![cow](moo.jpg)'
|
93
93
|
remark("<img src='moo.jpg' alt='cow' width='16'>").should == '<img src="moo.jpg" alt="cow" width="16" />'
|
94
94
|
end
|
95
|
+
|
96
|
+
it "should not have BR ruin all the fun" do
|
97
|
+
remark("<p>Foo<br>bar</p>").should == 'Foo bar'
|
98
|
+
remark("<p>Foo<br>\nbar <code>baz</code></p>").should == 'Foo bar `baz`'
|
99
|
+
end
|
100
|
+
|
101
|
+
it "should scope to the most likely element that holds content" do
|
102
|
+
remark(<<-HTML).should == "Wow, 3 paragraphs\n\nThis must be where the content is\n\nI'm sure"
|
103
|
+
<html>
|
104
|
+
<body>
|
105
|
+
<div id="div1">
|
106
|
+
<p>Only 1 paragraph</p>
|
107
|
+
</div>
|
108
|
+
<div id="div3">
|
109
|
+
<p>Wow, 3 paragraphs</p>
|
110
|
+
<p>This must be where the content is</p>
|
111
|
+
<p>I'm sure</p>
|
112
|
+
</div>
|
113
|
+
<div id="div2">
|
114
|
+
<p>Only 2 paragraphs</p>
|
115
|
+
<p>How disappointing</p>
|
116
|
+
</div>
|
117
|
+
</body>
|
118
|
+
</html>
|
119
|
+
HTML
|
120
|
+
end
|
95
121
|
end
|
96
122
|
|
data/spec/sample.html
CHANGED
@@ -29,6 +29,14 @@ Markdown doesn't have a syntax for them.</p>
|
|
29
29
|
<pre><code>And who would forget
|
30
30
|
Preformatted code blocks :)</code></pre>
|
31
31
|
|
32
|
+
<p>Notice how it handles <img src="moo.jpg" alt="images" title="Awesum img"> in a nice way.</p>
|
33
|
+
|
34
|
+
<blockquote>
|
35
|
+
<p>I think</p>
|
36
|
+
|
37
|
+
<p>therefore I am</p>
|
38
|
+
</blockquote>
|
39
|
+
|
32
40
|
<h2>TODO</h2>
|
33
41
|
|
34
42
|
<p>Remark should probably support BR elements in paragraphs,<br>
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mislav-remark
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- "Mislav Marohni\xC4\x87"
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-06-
|
12
|
+
date: 2009-06-24 00:00:00 -07:00
|
13
13
|
default_executable: remark
|
14
14
|
dependencies: []
|
15
15
|
|