devcenter-parser 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source "http://rubygems.org"
2
+
3
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2013 Heroku
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,27 @@
1
+ # devcenter-parser
2
+
3
+ Markdown parser used by [Heroku Dev Center](https://devcenter.heroku.com).
4
+
5
+ Usage:
6
+
7
+ ```ruby
8
+ require 'devcenter-parser'
9
+
10
+ md = '[Dev Center](https://devcenter.heroku.com)'
11
+ flavour = :github # :github or :maruku
12
+ DevcenterParser.to_html(md, flavour)
13
+ # => "<p><a href=\"https://devcenter.heroku.com\">Dev Center</a></p>"
14
+
15
+ broken_md = '[foo](bar'
16
+ begin
17
+ DevcenterParser.to_html(broken_md, :maruku)
18
+ rescue DevcenterParser::InvalidMarkdownError => e
19
+ puts e.message # parser-dependent (sometimes cryptic) debugging info
20
+ end
21
+ ```
22
+
23
+ ## License
24
+ See the LICENSE file included in the distribution.
25
+
26
+ ## Copyright
27
+ Copyright (C) 2013 Heroku <raul@heroku.com>.
@@ -0,0 +1,23 @@
1
+ $:.unshift Dir.pwd
2
+ require File.expand_path("./lib/devcenter-parser")
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.name = "devcenter-parser"
6
+ gem.version = DevcenterParser::VERSION
7
+ gem.authors = ["Raul Murciano"]
8
+ gem.email = ["raul@heroku.com"]
9
+ gem.homepage = "https://devcenter.heroku.com"
10
+ gem.summary = "Parser for Heroku Dev Center's content"
11
+ gem.description = "Parser for Heroku Dev Center's content"
12
+
13
+ gem.files = `git ls-files`.split($/)
14
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
15
+ gem.require_paths = %w{ lib }
16
+
17
+ gem.add_runtime_dependency 'maruku'
18
+ gem.add_runtime_dependency 'nokogiri'
19
+ gem.add_runtime_dependency 'redcarpet'
20
+ gem.add_runtime_dependency 'sanitize'
21
+
22
+ gem.add_development_dependency 'minitest', '>2.0'
23
+ end
@@ -0,0 +1,132 @@
1
+ require 'maruku'
2
+ require 'redcarpet'
3
+ require 'nokogiri'
4
+ require 'uri'
5
+ require 'sanitize'
6
+
7
+ module DevcenterParser
8
+
9
+ VERSION = '1.0.0'
10
+
11
+ AVAILABLE_FLAVOURS = [:github, :maruku]
12
+
13
+ class InvalidMarkdownError < Exception; end
14
+ class InvalidRawHTMLError < Exception; end
15
+ class UnknownFlavourError < Exception; end
16
+
17
+ def self.to_html(markdown, flavour)
18
+ html = to_unsanitized_html(markdown, flavour.to_sym)
19
+ sanitize(html)
20
+ end
21
+
22
+ def self.to_unsanitized_html(markdown, flavour)
23
+ doc = case flavour.to_sym
24
+ when :maruku
25
+ html = Maruku.new(markdown, :on_error => :raise).to_html
26
+ doc = Nokogiri::HTML::DocumentFragment.parse(html)
27
+ maruku_code_blocks(doc)
28
+ maruku_underscores_to_dashes_in_subheader_anchors(doc)
29
+ when :github
30
+ html = github_parser.render(markdown.to_s)
31
+ doc = Nokogiri::HTML::DocumentFragment.parse(html)
32
+ github_parse_special_blocks(doc)
33
+ github_underscores_to_dashes_in_subheader_anchors(doc)
34
+ else
35
+ raise UnknownFlavourError, "Markdown flavour '#{flavour}' not supported"
36
+ end
37
+ html = doc.to_html(:encoding => 'utf-8')
38
+ verify_raw_html(html)
39
+ html
40
+ rescue InvalidRawHTMLError => e
41
+ raise InvalidMarkdownError, e.message
42
+ rescue => e
43
+ raise InvalidMarkdownError, parse_maruku_error(e.message)
44
+ end
45
+
46
+ def self.sanitize(html)
47
+ Sanitize.clean(html, sanitize_config)
48
+ end
49
+
50
+ private
51
+
52
+ def self.github_parser
53
+ @@github_parser ||= Redcarpet::Markdown.new(Redcarpet::Render::HTML, fenced_code_blocks: true)
54
+ end
55
+
56
+ def self.sanitize_config
57
+ return @@sanitize_config if defined?(@@sanitize_config)
58
+ config = Sanitize::Config::RELAXED
59
+ config[:attributes][:all] += %w{ id class style name width height border align }
60
+ config[:attributes]['a'] += %w{ target }
61
+ config[:elements] += %w{ div span hr tt }
62
+
63
+ # embedded videos
64
+ config[:attributes][:all] += %w{ value src type allowscriptaccess allowfullscreen }
65
+ config[:elements] += %w{ object param embed }
66
+ config[:add_attributes] = {
67
+ 'object' => {'allowscriptaccess' => 'never'},
68
+ 'embed' => {'allowscriptaccess' => 'never'},
69
+ 'param' => {'allowscriptaccess' => 'never'}
70
+ }
71
+
72
+ @@sanitize_config = config.merge({remove_contents: true, allow_comments: true})
73
+ end
74
+
75
+ def self.maruku_code_blocks(doc)
76
+ doc.css('pre>code').each do |node|
77
+ if match = node.content.match(/\A\s*:::\s*(\w+)/)
78
+ lang = match[1]
79
+ node.content = node.content.gsub(/\A\s*:::\s*\w+\n/, '')
80
+ node['class'] = lang
81
+ end
82
+ end
83
+ doc
84
+ end
85
+
86
+ def self.maruku_underscores_to_dashes_in_subheader_anchors(doc)
87
+ doc.css("h2,h3,h4,h5,h6").each do |node|
88
+ if node.attributes['id'] && node.attributes['id'].value
89
+ node.attributes['id'].value = node.attributes['id'].value.gsub(/_+/,'-')
90
+ end
91
+ end
92
+ doc
93
+ end
94
+
95
+ def self.github_underscores_to_dashes_in_subheader_anchors(doc)
96
+ doc.css("h2,h3,h4,h5,h6").each do |node|
97
+ node['id'] = node.content.to_s.downcase.gsub(/\W/, '-')
98
+ end
99
+ doc
100
+ end
101
+
102
+ def self.github_parse_special_blocks(doc)
103
+ doc.css('blockquote>p:first').each do |node|
104
+ if match = node.inner_html.match(/\A\W*(callout|warning|note)\W/)
105
+ node.parent.name = 'div'
106
+ node.parent['class'] = match[1]
107
+ node.inner_html = node.inner_html.gsub(/\A\W*(callout|warning|note)\W/, '')
108
+ end
109
+ end
110
+ end
111
+
112
+ def self.verify_raw_html(html)
113
+ raise(InvalidRawHTMLError, parse_raw_html_error(html)) if invalid_raw_html?(html)
114
+ end
115
+
116
+ def self.invalid_raw_html?(html)
117
+ html.to_s.include?('markdown-html-error')
118
+ end
119
+
120
+ def self.parse_maruku_error(error_message)
121
+ lines = error_message.to_s.split("\n")
122
+ return lines unless lines.size > 1
123
+ msg = lines[4].gsub(/\A\|(\s)+|EOF\Z/,'').strip
124
+ code = lines[6].gsub(/\A\|(\s)+|EOF\Z/,'').strip
125
+ "#{msg} in \"#{code}\""
126
+ end
127
+
128
+ def self.parse_raw_html_error(html)
129
+ broken_html = html.match(/REXML could not parse this XML\/HTML\:(.+)<\/pre>/m)[1].strip rescue nil
130
+ broken_html.nil? ? "Contains broken raw HTML." : "This raw HTML is invalid: #{CGI.unescapeHTML(broken_html)}"
131
+ end
132
+ end
@@ -0,0 +1,207 @@
1
+ require 'minitest/autorun'
2
+ require_relative '../lib/devcenter-parser'
3
+
4
+
5
+ describe 'DevcenterParser' do
6
+
7
+ describe '.to_unsanitized_html' do
8
+ it 'maintains script tags' do
9
+ md = '<script>alert("hi")</script>'
10
+ assert_parsing_unsanitized_result md, :maruku, md
11
+ assert_parsing_unsanitized_result md, :github, '<script>alert("hi")</script>'
12
+ end
13
+ end
14
+
15
+ describe '.to_html' do
16
+
17
+ it 'raises InvalidMarkdownError when parsing invalid markdown' do
18
+ md = '[foo](bar'
19
+ assert_raises DevcenterParser::InvalidMarkdownError do
20
+ DevcenterParser.to_html(md, :maruku)
21
+ end
22
+ end
23
+
24
+ it 'respects existing ids' do
25
+ md = '<strong id="foo">clean</strong>'
26
+ assert_maruku_result md, '<strong id="foo">clean</strong>'
27
+ end
28
+
29
+ it 'removes script tags and their content' do
30
+ md = '<strong>clean<script>alert("hack!")</script></strong>'
31
+ assert_maruku_result md, '<strong>clean</strong>'
32
+ end
33
+
34
+ it 'github markdown includes ids in subheaders' do
35
+ md = <<-MARKDOWN
36
+ ## Foo Bar Header 123
37
+
38
+ Foo bar content
39
+ MARKDOWN
40
+ assert DevcenterParser.to_html(md, :github).include?('<h2 id="foo-bar-header-123">Foo Bar Header 123</h2>')
41
+ end
42
+
43
+ it 'maruku markdown includes ids in subheaders' do
44
+ md = <<-MARKDOWN
45
+ ## Foo Bar Header 123
46
+
47
+ Foo bar content
48
+ MARKDOWN
49
+ assert DevcenterParser.to_html(md, :maruku).include?('<h2 id="foo-bar-header-123">Foo Bar Header 123</h2>')
50
+ end
51
+
52
+ it 'github markdown supports regular block quotes without callout|warning|note' do
53
+ md = <<-MARKDOWN
54
+ Testing
55
+
56
+ > not a callout
57
+ > **strong**
58
+ > normal
59
+
60
+ And that's it.
61
+ MARKDOWN
62
+
63
+ html = <<-HTML
64
+ <p>Testing</p>
65
+
66
+ <blockquote>
67
+ <p>not a callout
68
+ <strong>strong</strong>
69
+ normal</p>
70
+ </blockquote>
71
+
72
+ <p>And that's it.</p>
73
+ HTML
74
+
75
+ assert_github_result(md, html)
76
+
77
+ md = <<-MARKDOWN
78
+ Testing
79
+
80
+ > calloutnonono
81
+ > **strong**
82
+ > normal
83
+
84
+ And that's it.
85
+ MARKDOWN
86
+
87
+ html = <<-HTML
88
+ <p>Testing</p>
89
+
90
+ <blockquote>
91
+ <p>calloutnonono
92
+ <strong>strong</strong>
93
+ normal</p>
94
+ </blockquote>
95
+
96
+ <p>And that's it.</p>
97
+ HTML
98
+
99
+ assert_github_result(md, html)
100
+ end
101
+
102
+ it 'github markdown supports "> callout" and ">callout" and parses inner markdown' do
103
+ mds = []
104
+ mds << <<-MARKDOWN
105
+ Testing
106
+
107
+ > callout
108
+ > **strong**
109
+ > normal
110
+
111
+ And that's it.
112
+ MARKDOWN
113
+
114
+ mds << <<-MARKDOWN
115
+ Testing
116
+
117
+ >callout
118
+ >**strong**
119
+ >normal
120
+
121
+ And that's it.
122
+ MARKDOWN
123
+
124
+ html = <<-HTML
125
+ <p>Testing</p>
126
+
127
+ <div class="callout">
128
+ <p><strong>strong</strong>
129
+ normal</p>
130
+ </div>
131
+
132
+ <p>And that's it.</p>
133
+ HTML
134
+
135
+ mds.each do |md|
136
+ assert_github_result(md, html)
137
+ end
138
+ end
139
+
140
+ it 'github markdown supports "> callout" and ">callout", parses inner markdown and allows paragraphs' do
141
+ mds = []
142
+ mds << <<-MARKDOWN
143
+ Testing
144
+
145
+ > callout
146
+ > **strong**
147
+
148
+ > normal
149
+
150
+ And that's it.
151
+ MARKDOWN
152
+
153
+ mds << <<-MARKDOWN
154
+ Testing
155
+
156
+ >callout
157
+ >**strong**
158
+
159
+ >normal
160
+
161
+ And that's it.
162
+ MARKDOWN
163
+
164
+ html = <<-HTML
165
+ <p>Testing</p>
166
+
167
+ <div class="callout">
168
+ <p><strong>strong</strong></p>
169
+
170
+ <p>normal</p>
171
+ </div>
172
+
173
+ <p>And that's it.</p>
174
+ HTML
175
+
176
+ mds.each do |md|
177
+ assert_github_result(md, html)
178
+ end
179
+ end
180
+
181
+ end
182
+
183
+ # helpers
184
+
185
+ def assert_all_flavours_result(md, expected)
186
+ [:github, :maruku].each { |flavour| assert_parsing_result(md, flavour, expected) }
187
+ end
188
+
189
+ def assert_maruku_result(md, expected)
190
+ assert_parsing_result md, :maruku, expected
191
+ end
192
+
193
+ def assert_github_result(md, expected)
194
+ assert_parsing_result md, :github, expected
195
+ end
196
+
197
+ def assert_parsing_result(md, flavour, expected)
198
+ result = DevcenterParser.to_html(md, flavour)
199
+ assert_equal expected.strip, result.strip, "Failed when parsing\n#{md}\nwith the #{flavour} flavour.\n\nExpected:\n#{expected}\n\nActual result:\n#{result}\n\n"
200
+ end
201
+
202
+ def assert_parsing_unsanitized_result(md, flavour, expected)
203
+ result = DevcenterParser.to_unsanitized_html(md, flavour)
204
+ assert_equal expected.strip, result.strip, "Failed when parsing on unsanitized mode\n#{md}\nwith the #{flavour} flavour.\n\nExpected:\n#{expected}\n\nActual result:\n#{result}\n\n"
205
+ end
206
+
207
+ end
metadata ADDED
@@ -0,0 +1,132 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: devcenter-parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Raul Murciano
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-05-21 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: maruku
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: nokogiri
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: redcarpet
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: sanitize
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :runtime
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ - !ruby/object:Gem::Dependency
79
+ name: minitest
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>'
84
+ - !ruby/object:Gem::Version
85
+ version: '2.0'
86
+ type: :development
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>'
92
+ - !ruby/object:Gem::Version
93
+ version: '2.0'
94
+ description: Parser for Heroku Dev Center's content
95
+ email:
96
+ - raul@heroku.com
97
+ executables: []
98
+ extensions: []
99
+ extra_rdoc_files: []
100
+ files:
101
+ - Gemfile
102
+ - LICENSE
103
+ - README.md
104
+ - devcenter-parser.gemspec
105
+ - lib/devcenter-parser.rb
106
+ - test/devcenter-parser_test.rb
107
+ homepage: https://devcenter.heroku.com
108
+ licenses: []
109
+ post_install_message:
110
+ rdoc_options: []
111
+ require_paths:
112
+ - lib
113
+ required_ruby_version: !ruby/object:Gem::Requirement
114
+ none: false
115
+ requirements:
116
+ - - ! '>='
117
+ - !ruby/object:Gem::Version
118
+ version: '0'
119
+ required_rubygems_version: !ruby/object:Gem::Requirement
120
+ none: false
121
+ requirements:
122
+ - - ! '>='
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ requirements: []
126
+ rubyforge_project:
127
+ rubygems_version: 1.8.23
128
+ signing_key:
129
+ specification_version: 3
130
+ summary: Parser for Heroku Dev Center's content
131
+ test_files:
132
+ - test/devcenter-parser_test.rb