mislav-remark 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.markdown CHANGED
@@ -1,17 +1,29 @@
1
- Remark
2
- ======
1
+ Remark — HTML→Markdown tool
2
+ ===========================
3
3
 
4
- A Ruby tool that parses HTML and delivers proper Markup.
4
+ <i>Remark</i> parses HTML and delivers proper Markdown.
5
5
 
6
6
  Usage
7
7
  -----
8
8
 
9
9
  From command-line:
10
10
 
11
- ruby -Ilib -rubygems bin/remark spec/sample.html
11
+ remark path/to/file.html
12
+
13
+ or by STDIN:
14
+
15
+ echo "..." | remark
16
+
17
+ You can try feeding it a document from the web:
18
+
19
+ curl -s daringfireball.net/projects/markdown/basics | remark > result.markdown
12
20
 
13
- (You can also give input to STDIN instead as file argument.)
21
+ See how it does.
22
+
23
+ If you've cloned the repository, invoke the binary like this:
24
+
25
+ ruby -Ilib -rubygems bin/remark spec/sample.html
14
26
 
15
- From Ruby code:
27
+ And this is how you use it from Ruby code:
16
28
 
17
29
  Remark.new('<h1>My document</h1><p>Some content</p>').to_markdown
data/Rakefile CHANGED
@@ -8,8 +8,8 @@ task :gemspec do
8
8
  gem.authors = ["Mislav Marohnić"]
9
9
  gem.has_rdoc = false
10
10
 
11
- gem.version = '0.1.0'
12
- gem.files = FileList['Rakefile', '{bin,lib,rails,spec}/**/*', 'README*', 'LICENSE*']
11
+ gem.version = '0.2.0'
12
+ gem.files = FileList['Rakefile', '{bin,lib,rails,spec}/**/*', 'README*', 'LICENSE*'] & `git ls-files`.split("\n")
13
13
  gem.executables = Dir['bin/*'].map { |f| File.basename(f) }
14
14
  end
15
15
 
data/lib/remark.rb CHANGED
@@ -6,7 +6,20 @@ class Remark
6
6
  end
7
7
 
8
8
  def to_markdown
9
- remark_children(@doc).join("\n\n")
9
+ remark_children(scope).join("\n\n")
10
+ end
11
+
12
+ def scope
13
+ if body = @doc.at('/html/body')
14
+ candidates = (body / 'p').inject(Hash.new(0)) do |memo, para|
15
+ memo[para.parent] += 1
16
+ memo
17
+ end.invert
18
+
19
+ candidates[candidates.keys.max]
20
+ else
21
+ @doc
22
+ end
10
23
  end
11
24
 
12
25
  IGNORE = %w(script head style)
@@ -35,7 +48,7 @@ class Remark
35
48
 
36
49
  def remark_item(item)
37
50
  if item.text?
38
- item.to_s.gsub(/\n+/, ' ') unless item.to_s =~ /^\s*$/
51
+ item.to_s.gsub(/\n+/, ' ') unless item.to_s =~ /\A\s*\Z/
39
52
  elsif item.elem?
40
53
  if IGNORE.include?(item.name)
41
54
  nil
@@ -71,6 +84,8 @@ class Remark
71
84
  '!' + remark_link(elem.attributes['alt'], elem.attributes['src'], elem.attributes['title'])
72
85
  when 'blockquote'
73
86
  remark_children(elem).join("\n\n").gsub(/^/, '> ')
87
+ when 'br'
88
+ ' ' + elem.inner_html
74
89
  else
75
90
  elem
76
91
  end
@@ -82,7 +97,7 @@ class Remark
82
97
  end
83
98
 
84
99
  def remark_inline(elem)
85
- remark_children(elem).join('')
100
+ remark_children(elem).join('').gsub(/\s{2,}/, ' ')
86
101
  end
87
102
 
88
103
  def remark_list(list)
data/spec/remark_spec.rb CHANGED
@@ -92,5 +92,31 @@ describe Remark do
92
92
  remark("<img src='moo.jpg' alt='cow'>").should == '![cow](moo.jpg)'
93
93
  remark("<img src='moo.jpg' alt='cow' width='16'>").should == '<img src="moo.jpg" alt="cow" width="16" />'
94
94
  end
95
+
96
+ it "should not have BR ruin all the fun" do
97
+ remark("<p>Foo<br>bar</p>").should == 'Foo bar'
98
+ remark("<p>Foo<br>\nbar <code>baz</code></p>").should == 'Foo bar `baz`'
99
+ end
100
+
101
+ it "should scope to the most likely element that holds content" do
102
+ remark(<<-HTML).should == "Wow, 3 paragraphs\n\nThis must be where the content is\n\nI'm sure"
103
+ <html>
104
+ <body>
105
+ <div id="div1">
106
+ <p>Only 1 paragraph</p>
107
+ </div>
108
+ <div id="div3">
109
+ <p>Wow, 3 paragraphs</p>
110
+ <p>This must be where the content is</p>
111
+ <p>I'm sure</p>
112
+ </div>
113
+ <div id="div2">
114
+ <p>Only 2 paragraphs</p>
115
+ <p>How disappointing</p>
116
+ </div>
117
+ </body>
118
+ </html>
119
+ HTML
120
+ end
95
121
  end
96
122
 
data/spec/sample.html CHANGED
@@ -29,6 +29,14 @@ Markdown doesn't have a syntax for them.</p>
29
29
  <pre><code>And who would forget
30
30
  Preformatted code blocks :)</code></pre>
31
31
 
32
+ <p>Notice how it handles <img src="moo.jpg" alt="images" title="Awesum img"> in a nice way.</p>
33
+
34
+ <blockquote>
35
+ <p>I think</p>
36
+
37
+ <p>therefore I am</p>
38
+ </blockquote>
39
+
32
40
  <h2>TODO</h2>
33
41
 
34
42
  <p>Remark should probably support BR elements in paragraphs,<br>
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mislav-remark
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - "Mislav Marohni\xC4\x87"
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-06-23 00:00:00 -07:00
12
+ date: 2009-06-24 00:00:00 -07:00
13
13
  default_executable: remark
14
14
  dependencies: []
15
15