mislav-remark 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.markdown CHANGED
@@ -1,17 +1,29 @@
1
- Remark
2
- ======
1
+ Remark — HTML→Markdown tool
2
+ ===========================
3
3
 
4
- A Ruby tool that parses HTML and delivers proper Markup.
4
+ <i>Remark</i> parses HTML and delivers proper Markdown.
5
5
 
6
6
  Usage
7
7
  -----
8
8
 
9
9
  From command-line:
10
10
 
11
- ruby -Ilib -rubygems bin/remark spec/sample.html
11
+ remark path/to/file.html
12
+
13
+ or by STDIN:
14
+
15
+ echo "..." | remark
16
+
17
+ You can try feeding it a document from the web:
18
+
19
+ curl -s daringfireball.net/projects/markdown/basics | remark > result.markdown
12
20
 
13
- (You can also give input to STDIN instead as file argument.)
21
+ See how it does.
22
+
23
+ If you've cloned the repository, invoke the binary like this:
24
+
25
+ ruby -Ilib -rubygems bin/remark spec/sample.html
14
26
 
15
- From Ruby code:
27
+ And this is how you use it from Ruby code:
16
28
 
17
29
  Remark.new('<h1>My document</h1><p>Some content</p>').to_markdown
data/Rakefile CHANGED
@@ -8,8 +8,8 @@ task :gemspec do
8
8
  gem.authors = ["Mislav Marohnić"]
9
9
  gem.has_rdoc = false
10
10
 
11
- gem.version = '0.1.0'
12
- gem.files = FileList['Rakefile', '{bin,lib,rails,spec}/**/*', 'README*', 'LICENSE*']
11
+ gem.version = '0.2.0'
12
+ gem.files = FileList['Rakefile', '{bin,lib,rails,spec}/**/*', 'README*', 'LICENSE*'] & `git ls-files`.split("\n")
13
13
  gem.executables = Dir['bin/*'].map { |f| File.basename(f) }
14
14
  end
15
15
 
data/lib/remark.rb CHANGED
@@ -6,7 +6,20 @@ class Remark
6
6
  end
7
7
 
8
8
  def to_markdown
9
- remark_children(@doc).join("\n\n")
9
+ remark_children(scope).join("\n\n")
10
+ end
11
+
12
+ def scope
13
+ if body = @doc.at('/html/body')
14
+ candidates = (body / 'p').inject(Hash.new(0)) do |memo, para|
15
+ memo[para.parent] += 1
16
+ memo
17
+ end.invert
18
+
19
+ candidates[candidates.keys.max]
20
+ else
21
+ @doc
22
+ end
10
23
  end
11
24
 
12
25
  IGNORE = %w(script head style)
@@ -35,7 +48,7 @@ class Remark
35
48
 
36
49
  def remark_item(item)
37
50
  if item.text?
38
- item.to_s.gsub(/\n+/, ' ') unless item.to_s =~ /^\s*$/
51
+ item.to_s.gsub(/\n+/, ' ') unless item.to_s =~ /\A\s*\Z/
39
52
  elsif item.elem?
40
53
  if IGNORE.include?(item.name)
41
54
  nil
@@ -71,6 +84,8 @@ class Remark
71
84
  '!' + remark_link(elem.attributes['alt'], elem.attributes['src'], elem.attributes['title'])
72
85
  when 'blockquote'
73
86
  remark_children(elem).join("\n\n").gsub(/^/, '> ')
87
+ when 'br'
88
+ ' ' + elem.inner_html
74
89
  else
75
90
  elem
76
91
  end
@@ -82,7 +97,7 @@ class Remark
82
97
  end
83
98
 
84
99
  def remark_inline(elem)
85
- remark_children(elem).join('')
100
+ remark_children(elem).join('').gsub(/\s{2,}/, ' ')
86
101
  end
87
102
 
88
103
  def remark_list(list)
data/spec/remark_spec.rb CHANGED
@@ -92,5 +92,31 @@ describe Remark do
92
92
  remark("<img src='moo.jpg' alt='cow'>").should == '![cow](moo.jpg)'
93
93
  remark("<img src='moo.jpg' alt='cow' width='16'>").should == '<img src="moo.jpg" alt="cow" width="16" />'
94
94
  end
95
+
96
+ it "should not have BR ruin all the fun" do
97
+ remark("<p>Foo<br>bar</p>").should == 'Foo bar'
98
+ remark("<p>Foo<br>\nbar <code>baz</code></p>").should == 'Foo bar `baz`'
99
+ end
100
+
101
+ it "should scope to the most likely element that holds content" do
102
+ remark(<<-HTML).should == "Wow, 3 paragraphs\n\nThis must be where the content is\n\nI'm sure"
103
+ <html>
104
+ <body>
105
+ <div id="div1">
106
+ <p>Only 1 paragraph</p>
107
+ </div>
108
+ <div id="div3">
109
+ <p>Wow, 3 paragraphs</p>
110
+ <p>This must be where the content is</p>
111
+ <p>I'm sure</p>
112
+ </div>
113
+ <div id="div2">
114
+ <p>Only 2 paragraphs</p>
115
+ <p>How disappointing</p>
116
+ </div>
117
+ </body>
118
+ </html>
119
+ HTML
120
+ end
95
121
  end
96
122
 
data/spec/sample.html CHANGED
@@ -29,6 +29,14 @@ Markdown doesn't have a syntax for them.</p>
29
29
  <pre><code>And who would forget
30
30
  Preformatted code blocks :)</code></pre>
31
31
 
32
+ <p>Notice how it handles <img src="moo.jpg" alt="images" title="Awesum img"> in a nice way.</p>
33
+
34
+ <blockquote>
35
+ <p>I think</p>
36
+
37
+ <p>therefore I am</p>
38
+ </blockquote>
39
+
32
40
  <h2>TODO</h2>
33
41
 
34
42
  <p>Remark should probably support BR elements in paragraphs,<br>
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mislav-remark
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - "Mislav Marohni\xC4\x87"
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-06-23 00:00:00 -07:00
12
+ date: 2009-06-24 00:00:00 -07:00
13
13
  default_executable: remark
14
14
  dependencies: []
15
15