devcenter-parser 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source "http://rubygems.org"
2
+
3
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2013 Heroku
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,27 @@
1
+ # devcenter-parser
2
+
3
+ Markdown parser used by [Heroku Dev Center](https://devcenter.heroku.com).
4
+
5
+ Usage:
6
+
7
+ ```ruby
8
+ require 'devcenter-parser'
9
+
10
+ md = '[Dev Center](https://devcenter.heroku.com)'
11
+ flavour = :github # :github or :maruku
12
+ DevcenterParser.to_html(md, flavour)
13
+ # => "<p><a href=\"https://devcenter.heroku.com\">Dev Center</a></p>"
14
+
15
+ broken_md = '[foo](bar'
16
+ begin
17
+ DevcenterParser.to_html(broken_md, :maruku)
18
+ rescue DevcenterParser::InvalidMarkdownError => e
19
+ puts e.message # parser-dependent (sometimes cryptic) debugging info
20
+ end
21
+ ```
22
+
23
+ ## License
24
+ See the LICENSE file included in the distribution.
25
+
26
+ ## Copyright
27
+ Copyright (C) 2013 Heroku <raul@heroku.com>.
@@ -0,0 +1,23 @@
1
+ $:.unshift Dir.pwd
2
+ require File.expand_path("./lib/devcenter-parser")
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.name = "devcenter-parser"
6
+ gem.version = DevcenterParser::VERSION
7
+ gem.authors = ["Raul Murciano"]
8
+ gem.email = ["raul@heroku.com"]
9
+ gem.homepage = "https://devcenter.heroku.com"
10
+ gem.summary = "Parser for Heroku Dev Center's content"
11
+ gem.description = "Parser for Heroku Dev Center's content"
12
+
13
+ gem.files = `git ls-files`.split($/)
14
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
15
+ gem.require_paths = %w{ lib }
16
+
17
+ gem.add_runtime_dependency 'maruku'
18
+ gem.add_runtime_dependency 'nokogiri'
19
+ gem.add_runtime_dependency 'redcarpet'
20
+ gem.add_runtime_dependency 'sanitize'
21
+
22
+ gem.add_development_dependency 'minitest', '>2.0'
23
+ end
@@ -0,0 +1,132 @@
1
+ require 'maruku'
2
+ require 'redcarpet'
3
+ require 'nokogiri'
4
+ require 'uri'
5
+ require 'sanitize'
6
+
7
+ module DevcenterParser
8
+
9
+ VERSION = '1.0.0'
10
+
11
+ AVAILABLE_FLAVOURS = [:github, :maruku]
12
+
13
+ class InvalidMarkdownError < Exception; end
14
+ class InvalidRawHTMLError < Exception; end
15
+ class UnknownFlavourError < Exception; end
16
+
17
+ def self.to_html(markdown, flavour)
18
+ html = to_unsanitized_html(markdown, flavour.to_sym)
19
+ sanitize(html)
20
+ end
21
+
22
+ def self.to_unsanitized_html(markdown, flavour)
23
+ doc = case flavour.to_sym
24
+ when :maruku
25
+ html = Maruku.new(markdown, :on_error => :raise).to_html
26
+ doc = Nokogiri::HTML::DocumentFragment.parse(html)
27
+ maruku_code_blocks(doc)
28
+ maruku_underscores_to_dashes_in_subheader_anchors(doc)
29
+ when :github
30
+ html = github_parser.render(markdown.to_s)
31
+ doc = Nokogiri::HTML::DocumentFragment.parse(html)
32
+ github_parse_special_blocks(doc)
33
+ github_underscores_to_dashes_in_subheader_anchors(doc)
34
+ else
35
+ raise UnknownFlavourError, "Markdown flavour '#{flavour}' not supported"
36
+ end
37
+ html = doc.to_html(:encoding => 'utf-8')
38
+ verify_raw_html(html)
39
+ html
40
+ rescue InvalidRawHTMLError => e
41
+ raise InvalidMarkdownError, e.message
42
+ rescue => e
43
+ raise InvalidMarkdownError, parse_maruku_error(e.message)
44
+ end
45
+
46
+ def self.sanitize(html)
47
+ Sanitize.clean(html, sanitize_config)
48
+ end
49
+
50
+ private
51
+
52
+ def self.github_parser
53
+ @@github_parser ||= Redcarpet::Markdown.new(Redcarpet::Render::HTML, fenced_code_blocks: true)
54
+ end
55
+
56
+ def self.sanitize_config
57
+ return @@sanitize_config if defined?(@@sanitize_config)
58
+ config = Sanitize::Config::RELAXED
59
+ config[:attributes][:all] += %w{ id class style name width height border align }
60
+ config[:attributes]['a'] += %w{ target }
61
+ config[:elements] += %w{ div span hr tt }
62
+
63
+ # embedded videos
64
+ config[:attributes][:all] += %w{ value src type allowscriptaccess allowfullscreen }
65
+ config[:elements] += %w{ object param embed }
66
+ config[:add_attributes] = {
67
+ 'object' => {'allowscriptaccess' => 'never'},
68
+ 'embed' => {'allowscriptaccess' => 'never'},
69
+ 'param' => {'allowscriptaccess' => 'never'}
70
+ }
71
+
72
+ @@sanitize_config = config.merge({remove_contents: true, allow_comments: true})
73
+ end
74
+
75
+ def self.maruku_code_blocks(doc)
76
+ doc.css('pre>code').each do |node|
77
+ if match = node.content.match(/\A\s*:::\s*(\w+)/)
78
+ lang = match[1]
79
+ node.content = node.content.gsub(/\A\s*:::\s*\w+\n/, '')
80
+ node['class'] = lang
81
+ end
82
+ end
83
+ doc
84
+ end
85
+
86
+ def self.maruku_underscores_to_dashes_in_subheader_anchors(doc)
87
+ doc.css("h2,h3,h4,h5,h6").each do |node|
88
+ if node.attributes['id'] && node.attributes['id'].value
89
+ node.attributes['id'].value = node.attributes['id'].value.gsub(/_+/,'-')
90
+ end
91
+ end
92
+ doc
93
+ end
94
+
95
+ def self.github_underscores_to_dashes_in_subheader_anchors(doc)
96
+ doc.css("h2,h3,h4,h5,h6").each do |node|
97
+ node['id'] = node.content.to_s.downcase.gsub(/\W/, '-')
98
+ end
99
+ doc
100
+ end
101
+
102
+ def self.github_parse_special_blocks(doc)
103
+ doc.css('blockquote>p:first').each do |node|
104
+ if match = node.inner_html.match(/\A\W*(callout|warning|note)\W/)
105
+ node.parent.name = 'div'
106
+ node.parent['class'] = match[1]
107
+ node.inner_html = node.inner_html.gsub(/\A\W*(callout|warning|note)\W/, '')
108
+ end
109
+ end
110
+ end
111
+
112
+ def self.verify_raw_html(html)
113
+ raise(InvalidRawHTMLError, parse_raw_html_error(html)) if invalid_raw_html?(html)
114
+ end
115
+
116
+ def self.invalid_raw_html?(html)
117
+ html.to_s.include?('markdown-html-error')
118
+ end
119
+
120
+ def self.parse_maruku_error(error_message)
121
+ lines = error_message.to_s.split("\n")
122
+ return lines unless lines.size > 1
123
+ msg = lines[4].gsub(/\A\|(\s)+|EOF\Z/,'').strip
124
+ code = lines[6].gsub(/\A\|(\s)+|EOF\Z/,'').strip
125
+ "#{msg} in \"#{code}\""
126
+ end
127
+
128
+ def self.parse_raw_html_error(html)
129
+ broken_html = html.match(/REXML could not parse this XML\/HTML\:(.+)<\/pre>/m)[1].strip rescue nil
130
+ broken_html.nil? ? "Contains broken raw HTML." : "This raw HTML is invalid: #{CGI.unescapeHTML(broken_html)}"
131
+ end
132
+ end
@@ -0,0 +1,207 @@
1
+ require 'minitest/autorun'
2
+ require_relative '../lib/devcenter-parser'
3
+
4
+
5
+ describe 'DevcenterParser' do
6
+
7
+ describe '.to_unsanitized_html' do
8
+ it 'maintains script tags' do
9
+ md = '<script>alert("hi")</script>'
10
+ assert_parsing_unsanitized_result md, :maruku, md
11
+ assert_parsing_unsanitized_result md, :github, '<script>alert("hi")</script>'
12
+ end
13
+ end
14
+
15
+ describe '.to_html' do
16
+
17
+ it 'raises InvalidMarkdownError when parsing invalid markdown' do
18
+ md = '[foo](bar'
19
+ assert_raises DevcenterParser::InvalidMarkdownError do
20
+ DevcenterParser.to_html(md, :maruku)
21
+ end
22
+ end
23
+
24
+ it 'respects existing ids' do
25
+ md = '<strong id="foo">clean</strong>'
26
+ assert_maruku_result md, '<strong id="foo">clean</strong>'
27
+ end
28
+
29
+ it 'removes script tags and their content' do
30
+ md = '<strong>clean<script>alert("hack!")</script></strong>'
31
+ assert_maruku_result md, '<strong>clean</strong>'
32
+ end
33
+
34
+ it 'github markdown includes ids in subheaders' do
35
+ md = <<-MARKDOWN
36
+ ## Foo Bar Header 123
37
+
38
+ Foo bar content
39
+ MARKDOWN
40
+ assert DevcenterParser.to_html(md, :github).include?('<h2 id="foo-bar-header-123">Foo Bar Header 123</h2>')
41
+ end
42
+
43
+ it 'maruku markdown includes ids in subheaders' do
44
+ md = <<-MARKDOWN
45
+ ## Foo Bar Header 123
46
+
47
+ Foo bar content
48
+ MARKDOWN
49
+ assert DevcenterParser.to_html(md, :maruku).include?('<h2 id="foo-bar-header-123">Foo Bar Header 123</h2>')
50
+ end
51
+
52
+ it 'github markdown supports regular block quotes without callout|warning|note' do
53
+ md = <<-MARKDOWN
54
+ Testing
55
+
56
+ > not a callout
57
+ > **strong**
58
+ > normal
59
+
60
+ And that's it.
61
+ MARKDOWN
62
+
63
+ html = <<-HTML
64
+ <p>Testing</p>
65
+
66
+ <blockquote>
67
+ <p>not a callout
68
+ <strong>strong</strong>
69
+ normal</p>
70
+ </blockquote>
71
+
72
+ <p>And that's it.</p>
73
+ HTML
74
+
75
+ assert_github_result(md, html)
76
+
77
+ md = <<-MARKDOWN
78
+ Testing
79
+
80
+ > calloutnonono
81
+ > **strong**
82
+ > normal
83
+
84
+ And that's it.
85
+ MARKDOWN
86
+
87
+ html = <<-HTML
88
+ <p>Testing</p>
89
+
90
+ <blockquote>
91
+ <p>calloutnonono
92
+ <strong>strong</strong>
93
+ normal</p>
94
+ </blockquote>
95
+
96
+ <p>And that's it.</p>
97
+ HTML
98
+
99
+ assert_github_result(md, html)
100
+ end
101
+
102
+ it 'github markdown supports "> callout" and ">callout" and parses inner markdown' do
103
+ mds = []
104
+ mds << <<-MARKDOWN
105
+ Testing
106
+
107
+ > callout
108
+ > **strong**
109
+ > normal
110
+
111
+ And that's it.
112
+ MARKDOWN
113
+
114
+ mds << <<-MARKDOWN
115
+ Testing
116
+
117
+ >callout
118
+ >**strong**
119
+ >normal
120
+
121
+ And that's it.
122
+ MARKDOWN
123
+
124
+ html = <<-HTML
125
+ <p>Testing</p>
126
+
127
+ <div class="callout">
128
+ <p><strong>strong</strong>
129
+ normal</p>
130
+ </div>
131
+
132
+ <p>And that's it.</p>
133
+ HTML
134
+
135
+ mds.each do |md|
136
+ assert_github_result(md, html)
137
+ end
138
+ end
139
+
140
+ it 'github markdown supports "> callout" and ">callout", parses inner markdown and allows paragraphs' do
141
+ mds = []
142
+ mds << <<-MARKDOWN
143
+ Testing
144
+
145
+ > callout
146
+ > **strong**
147
+
148
+ > normal
149
+
150
+ And that's it.
151
+ MARKDOWN
152
+
153
+ mds << <<-MARKDOWN
154
+ Testing
155
+
156
+ >callout
157
+ >**strong**
158
+
159
+ >normal
160
+
161
+ And that's it.
162
+ MARKDOWN
163
+
164
+ html = <<-HTML
165
+ <p>Testing</p>
166
+
167
+ <div class="callout">
168
+ <p><strong>strong</strong></p>
169
+
170
+ <p>normal</p>
171
+ </div>
172
+
173
+ <p>And that's it.</p>
174
+ HTML
175
+
176
+ mds.each do |md|
177
+ assert_github_result(md, html)
178
+ end
179
+ end
180
+
181
+ end
182
+
183
+ # helpers
184
+
185
+ def assert_all_flavours_result(md, expected)
186
+ [:github, :maruku].each { |flavour| assert_parsing_result(md, flavour, expected) }
187
+ end
188
+
189
+ def assert_maruku_result(md, expected)
190
+ assert_parsing_result md, :maruku, expected
191
+ end
192
+
193
+ def assert_github_result(md, expected)
194
+ assert_parsing_result md, :github, expected
195
+ end
196
+
197
+ def assert_parsing_result(md, flavour, expected)
198
+ result = DevcenterParser.to_html(md, flavour)
199
+ assert_equal expected.strip, result.strip, "Failed when parsing\n#{md}\nwith the #{flavour} flavour.\n\nExpected:\n#{expected}\n\nActual result:\n#{result}\n\n"
200
+ end
201
+
202
+ def assert_parsing_unsanitized_result(md, flavour, expected)
203
+ result = DevcenterParser.to_unsanitized_html(md, flavour)
204
+ assert_equal expected.strip, result.strip, "Failed when parsing on unsanitized mode\n#{md}\nwith the #{flavour} flavour.\n\nExpected:\n#{expected}\n\nActual result:\n#{result}\n\n"
205
+ end
206
+
207
+ end
metadata ADDED
@@ -0,0 +1,132 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: devcenter-parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Raul Murciano
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-05-21 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: maruku
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: nokogiri
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: redcarpet
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: sanitize
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :runtime
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ - !ruby/object:Gem::Dependency
79
+ name: minitest
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>'
84
+ - !ruby/object:Gem::Version
85
+ version: '2.0'
86
+ type: :development
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>'
92
+ - !ruby/object:Gem::Version
93
+ version: '2.0'
94
+ description: Parser for Heroku Dev Center's content
95
+ email:
96
+ - raul@heroku.com
97
+ executables: []
98
+ extensions: []
99
+ extra_rdoc_files: []
100
+ files:
101
+ - Gemfile
102
+ - LICENSE
103
+ - README.md
104
+ - devcenter-parser.gemspec
105
+ - lib/devcenter-parser.rb
106
+ - test/devcenter-parser_test.rb
107
+ homepage: https://devcenter.heroku.com
108
+ licenses: []
109
+ post_install_message:
110
+ rdoc_options: []
111
+ require_paths:
112
+ - lib
113
+ required_ruby_version: !ruby/object:Gem::Requirement
114
+ none: false
115
+ requirements:
116
+ - - ! '>='
117
+ - !ruby/object:Gem::Version
118
+ version: '0'
119
+ required_rubygems_version: !ruby/object:Gem::Requirement
120
+ none: false
121
+ requirements:
122
+ - - ! '>='
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ requirements: []
126
+ rubyforge_project:
127
+ rubygems_version: 1.8.23
128
+ signing_key:
129
+ specification_version: 3
130
+ summary: Parser for Heroku Dev Center's content
131
+ test_files:
132
+ - test/devcenter-parser_test.rb