RubyGems - devcenter-parser - Versions diffs - 1.3.9 → 1.4.0 - Mend

devcenter-parser 1.3.9 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

checksums.yaml +5 -5
data/README.md +7 -1
data/lib/devcenter-parser.rb +24 -75
data/lib/devcenter-parser/github_parser.rb +35 -0
data/lib/devcenter-parser/header_id_generator.rb +69 -0
data/lib/devcenter-parser/maruku_parser.rb +23 -0
data/lib/devcenter-parser/version.rb +2 -2
data/test/devcenter-parser_test.rb +11 -35
data/test/header_id_generator_test.rb +63 -0
metadata +14 -9

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
-SHA1:
-  metadata.gz: c86b19b7db5df1fbf3ff75efcf6e8c5e820d4144
-  data.tar.gz: 005abc3e6957539c3c459748fae88dcc839338c0
+!binary "U0hBMQ==":
+  metadata.gz: 731e954adb885153b1d9f5cecb351e93af1c249d
+  data.tar.gz: bc8bca7b5de1f10ff4400b43da93c93b299bf352
 SHA512:
-  metadata.gz: be7ebb71b4c901ed408ca12a52772abfd4ea2dbd12cf4ba24d8f57c0e4198e04e2685c9e2f484dc06c44ba12a4047d089cf1b7ef9f418cfd27d5cecfa9c3ccea
-  data.tar.gz: e3440ec9fcd6918dec57d4e16942a4d1ed98a871699ecc1afbe2f027b26f3593f38b30357ca31b496bf399556468639831e1d7ae7f246a92ac4538960adb15df
+  metadata.gz: dbfa585bb7106530d36e94166f845acd49c96c952fab64a1a96e89eb0fc3d76772ff6537357047388d40596e264132d96eaa7d2c1a7f479c6d8f70676317b4da
+  data.tar.gz: 443d781a6d537f9612f8b8dcb4809a8d766f31c362f9826ecccb4c56892f11654a89a54c7461027405c5598afbe4ad25b012f3eeea41f1a67b40e2f901310627

data/README.md CHANGED

@@ -20,8 +20,14 @@ Usage:
     end
 ```
+Test:
+```bash
+$ rake
+```
 ## License
 See the LICENSE file included in the distribution.
 ## Copyright
-Copyright (C) 2013 Heroku <raul@heroku.com>.
+Copyright (C) 2013 Heroku <raul@heroku.com>.

data/lib/devcenter-parser.rb CHANGED

@@ -3,45 +3,29 @@ require 'redcarpet'
 require 'nokogiri'
 require 'uri'
 require 'sanitize'
+require 'ostruct'
+require_relative './devcenter-parser/header_id_generator'
+require_relative './devcenter-parser/github_parser'
+require_relative './devcenter-parser/maruku_parser'
 module DevcenterParser
   AVAILABLE_FLAVOURS = [:github, :maruku]
   class InvalidMarkdownError < Exception; end
   class InvalidRawHTMLError < Exception; end
   class UnknownFlavourError < Exception; end
-  class HTMLWithPantsRenderer < Redcarpet::Render::HTML
-    include Redcarpet::Render::SmartyPants
-  end
   def self.to_html(markdown, flavour)
     html = to_unsanitized_html(markdown, flavour.to_sym)
     sanitize(html)
   end
   def self.to_unsanitized_html(markdown, flavour)
-    markdown = normalize_newlines(markdown.to_s)
-    markdown = separate_consecutive_blockquote_blocks(markdown)
-    doc = case flavour.to_sym
-          when :maruku
-            html = Maruku.new(markdown, :on_error => :raise).to_html
-            doc = Nokogiri::HTML::DocumentFragment.parse(html)
-            maruku_code_blocks(doc)
-            maruku_underscores_to_dashes_in_subheader_anchors(doc)
-          when :github
-            html = github_parser.render(markdown.to_s)
-            doc = Nokogiri::HTML::DocumentFragment.parse(html)
-            github_parse_special_blocks(doc)
-            github_underscores_to_dashes_in_subheader_anchors(doc)
-          else
-            raise UnknownFlavourError, "Markdown flavour '#{flavour}' not supported"
-          end
-    convert_to_article_links_all_relative_links_with_missing_initial_slashes(doc)
-    html = doc.to_html(:encoding => 'utf-8')
-    verify_raw_html(html)
-    html
+    raise(UnknownFlavourError, "Markdown flavour '#{flavour}' not supported") unless %w{ maruku github }.include?(flavour.to_s)
+    markdown = normalize_markdown(markdown)
+    markdown_parser = flavour.to_s == 'maruku' ? MarukuParser : GitHubParser
+    doc = markdown_parser.parse(markdown)
+    doc_to_html(doc)
   rescue InvalidRawHTMLError => e
     raise InvalidMarkdownError, e.message
   rescue => e
@@ -54,9 +38,22 @@ module DevcenterParser
   private
+  def self.doc_to_html(doc)
+    HeaderIdGenerator.apply!(doc)
+    convert_to_article_links_all_relative_links_with_missing_initial_slashes(doc)
+    html = doc.to_html(:encoding => 'utf-8')
+    verify_raw_html(html)
+    html
+  end
+  def self.normalize_markdown(markdown)
+    markdown = normalize_newlines(markdown.to_s)
+    separate_consecutive_blockquote_blocks(markdown)
+  end
   # The current parsers consider something like:
   # > foo
-  #
+  #
   # > bar
   # as a single blockquote, while we want it to be two different ones.
   # This method adds an empty paragraph between consecutive blocks so parsers process them separately
@@ -69,10 +66,6 @@ module DevcenterParser
     markdown.lines.map{ |l| l.rstrip }.join("\n")
   end
-  def self.github_parser
-    @@github_parser ||= Redcarpet::Markdown.new(HTMLWithPantsRenderer, fenced_code_blocks: true, tables: true)
-  end
   def self.sanitize_config
     return @@sanitize_config if defined?(@@sanitize_config)
     config = Sanitize::Config::RELAXED
@@ -92,50 +85,6 @@ module DevcenterParser
     @@sanitize_config = config.merge({remove_contents: true, allow_comments: true})
   end
-  def self.maruku_code_blocks(doc)
-    doc.css('pre>code').each do |node|
-      if match = node.content.match(/\A\s*:::\s*(\w+)/)
-        lang = match[1]
-        node.content = node.content.gsub(/\A\s*:::\s*\w+\n/, '')
-        node['class'] = lang
-      end
-    end
-    doc
-  end
-  def self.maruku_underscores_to_dashes_in_subheader_anchors(doc)
-    doc.css("h2,h3,h4,h5,h6").each do |node|
-      node['id'] = subheader_id(node.content)
-    end
-    doc
-  end
-  def self.github_underscores_to_dashes_in_subheader_anchors(doc)
-    doc.css("h2,h3,h4,h5,h6").each do |node|
-      node['id'] = subheader_id(node.content)
-    end
-    doc
-  end
-  def self.subheader_id(content)
-    content.to_s.downcase.gsub(/\W+/, '-').gsub(/\A-+|-+\Z/, '')
-  end
-  def self.github_parse_special_blocks(doc)
-    doc.css('blockquote>p:first').each do |node|
-      if match = node.inner_html.match(/\A\W*(callout|warning|note)\W/)
-        node.parent.name = 'div'
-        node.parent['class'] = match[1]
-        new_html = node.inner_html.gsub(/\A\W*(callout|warning|note)\W/, '')
-        # Assigning inner_html directly causes encoding issues in old libxml versions,
-        # workaround from https://github.com/sparklemotion/nokogiri/issues/458#issuecomment-3136620
-        node.children = Nokogiri::HTML.fragment(new_html, 'utf-8')
-      end
-    end
-  end
   def self.convert_to_article_links_all_relative_links_with_missing_initial_slashes(doc)
     doc.css('a').each do |node|
       unless node['href'].nil? || node['href'] =~ /\Ahttp|\A\/|\Amailto\:|\A#/
@@ -164,4 +113,4 @@ module DevcenterParser
     broken_html = html.match(/REXML could not parse this XML\/HTML\:(.+)<\/pre>/m)[1].strip rescue nil
     broken_html.nil? ? "Contains broken raw HTML." : "This raw HTML is invalid: #{CGI.unescapeHTML(broken_html)}"
   end
-end
+end

data/lib/devcenter-parser/github_parser.rb ADDED

@@ -0,0 +1,35 @@
+module GitHubParser
+  extend self
+  class HTMLWithPantsRenderer < Redcarpet::Render::HTML
+    include Redcarpet::Render::SmartyPants
+  end
+  def self.parse(markdown)
+    html = github_parser.render(markdown.to_s)
+    doc = Nokogiri::HTML::DocumentFragment.parse(html)
+    special_blocks(doc)
+    doc
+  end
+  private
+  def self.github_parser
+    @@github_parser ||= Redcarpet::Markdown.new(HTMLWithPantsRenderer, fenced_code_blocks: true, tables: true)
+  end
+  def self.special_blocks(doc)
+    doc.css('blockquote>p:first').each do |node|
+      if match = node.inner_html.match(/\A\W*(callout|warning|note)\W/)
+        node.parent.name = 'div'
+        node.parent['class'] = match[1]
+        new_html = node.inner_html.gsub(/\A\W*(callout|warning|note)\W/, '')
+        # Assigning inner_html directly causes encoding issues in old libxml versions,
+        # workaround from https://github.com/sparklemotion/nokogiri/issues/458#issuecomment-3136620
+        node.children = Nokogiri::HTML.fragment(new_html, 'utf-8')
+      end
+    end
+  end
+end

data/lib/devcenter-parser/header_id_generator.rb ADDED

@@ -0,0 +1,69 @@
+# Generates header -> ids in the given doc, calculating the ids from the heading
+# text and ensuring that there are no duplicated ids
+class HeaderIdGenerator
+  def self.apply!(doc)
+    self.new(doc)
+  end
+  def initialize(doc)
+    @doc = doc
+    @header_nodes = @doc.css("h2,h3,h4,h5,h6").to_a
+    # { node -> id } hash
+    @nodes_ids = @header_nodes.inject({}){ |hash, node| hash[node] = nil; hash }
+    add_default_ids
+    prepend_parents_on_conflicts
+    append_numbers_on_conflicts
+    @nodes_ids.each{ |node, id| node['id'] = id }
+  end
+  private
+  # Parent != DOM nesting, but in the context of the content <h2></h2> ... <h3></h3>
+  def prepend_parents_on_conflicts
+    conflicts(@nodes_ids).each do |node, id|
+      parent_contents = parent_header_nodes(node).map{ |parent_node| parent_node.content }
+      content = (parent_contents + [node.content]).join(' ')
+      @nodes_ids[node] = subheader_id(content.to_s)
+    end
+  end
+  def parent_header_nodes(node)
+    parent_tags(node.name).map do |parent_tag|
+      @header_nodes[0..@header_nodes.index(node)-1].select{ |sibling| sibling.name == parent_tag }.last
+    end
+  end
+  # "h4" -> ["h2", "h3"]
+  def parent_tags(tag)
+    level = tag.gsub('h','').to_i
+    (2..level-1).map{ |n| "h#{n}" }
+  end
+  def append_numbers_on_conflicts
+    conflicts(@nodes_ids).group_by{ |node, id| id }.each do |id, id_conflicts|
+      id_conflicts.each_with_index do |conflict, n|
+        node = conflict[0]
+        new_id = "#{id}-#{n+1}"
+        @nodes_ids[node] = new_id
+      end
+    end
+  end
+  def conflicts(hash)
+    hash.select{ |node1, id1| hash.select{ |node1, id2| id1 == id2 }.size > 1 }
+  end
+  def add_default_ids
+    @nodes_ids.each{ |node, id| @nodes_ids[node] = subheader_id(node.content) }
+  end
+  def subheader_id(content)
+    content.to_s.downcase.gsub(/\W+/, '-').gsub(/\A-+|-+\Z/, '')
+  end
+end

data/lib/devcenter-parser/maruku_parser.rb ADDED

@@ -0,0 +1,23 @@
+module MarukuParser
+  extend self
+  def self.parse(markdown)
+    html = Maruku.new(markdown, :on_error => :raise).to_html
+    doc = Nokogiri::HTML::DocumentFragment.parse(html)
+    code_blocks(doc)
+    doc
+  end
+  private
+  def self.code_blocks(doc)
+    doc.css('pre>code').each do |node|
+      if match = node.content.match(/\A\s*:::\s*(\w+)/)
+        lang = match[1]
+        node.content = node.content.gsub(/\A\s*:::\s*\w+\n/, '')
+        node['class'] = lang
+      end
+    end
+    doc
+  end
+end

data/lib/devcenter-parser/version.rb CHANGED

@@ -1,3 +1,3 @@
 module DevcenterParser
-  VERSION = '1.3.9'
-end
+  VERSION = '1.4.0'
+end

data/test/devcenter-parser_test.rb CHANGED

@@ -5,7 +5,7 @@ require_relative '../lib/devcenter-parser'
 describe 'DevcenterParser' do
   describe '.to_unsanitized_html' do
     it 'returns empty string for nil input' do
       assert_parsing_unsanitized_result nil, :maruku, ''
       assert_parsing_unsanitized_result nil, :maruku, ''
@@ -13,7 +13,7 @@ describe 'DevcenterParser' do
     it 'maintains script tags' do
       md = '<script>alert("hi")</script>'
-      assert_parsing_unsanitized_result md, :maruku, '<script><![CDATA[alert("hi")]]></script>'
+      assert_parsing_unsanitized_result md, :maruku, '<script>alert("hi")</script>'
       assert_parsing_unsanitized_result md, :github, '<script>alert("hi")</script>'
     end
@@ -33,13 +33,6 @@ describe 'DevcenterParser' do
       end
     end
-    it 'respects existing ids' do
-      md = '<strong id="foo">clean</strong>'
-      html = '<p><strong id="foo">clean</strong></p>'
-      assert_maruku_result md, html
-      assert_github_result md, html
-    end
     it 'removes script tags and their content' do
       md = '<strong>clean<script>alert("hack!")</script></strong>'
       html = '<p><strong>clean</strong></p>'
@@ -47,28 +40,6 @@ describe 'DevcenterParser' do
       assert_github_result md, html
     end
-    it 'includes ids in subheaders' do
-      md = <<-MARKDOWN
-## Foo Bar Header 123
-Foo bar content
-    MARKDOWN
-      assert DevcenterParser.to_html(md, :github).include?('<h2 id="foo-bar-header-123">Foo Bar Header 123</h2>')
-      assert DevcenterParser.to_html(md, :maruku).include?('<h2 id="foo-bar-header-123">Foo Bar Header 123</h2>')
-    end
-    it 'generates ids replacing inner non-alphanum chars with dashes' do
-      ['Foo Bar', 'Foo-Bar', 'Foo#bar', 'Foo##Bar', 'Foo##Bar', '-$Foo##Bar$-'].each do |title|
-        md = <<-MARKDOWN
-## #{title}
-Foo bar content
-MARKDOWN
-      assert DevcenterParser.to_html(md, :github).include?("<h2 id=\"foo-bar\">#{title}</h2>"), "GitHub with title #{title}: " + DevcenterParser.to_html(md, :github)
-      assert DevcenterParser.to_html(md, :maruku).include?("<h2 id=\"foo-bar\">#{title}</h2>"), "Maruku: " + DevcenterParser.to_html(md, :maruku)
-      end
-    end
     describe 'github markdown' do
       it 'generates apostrophes from single quotes in plain text' do
@@ -241,7 +212,7 @@ Testing
 > callout
 > **strong**
-> more callout
+> more callout
 > normal
@@ -253,7 +224,7 @@ Testing
 >callout
 >**strong**
->more callout
+>more callout
 >normal
@@ -312,7 +283,7 @@ more callout</p>
 |  A  |  B  |
 | --- | --- |
 |  1  |  2  |
-|  3  |  4  |
+|  3  |  4  |
       MARKDOWN
       html = <<-HTML
@@ -430,4 +401,9 @@ more callout</p>
     assert_equal expected.strip, result.strip, "Failed when parsing on unsanitized mode\n#{md}\nwith the #{flavour} flavour.\n\nExpected:\n#{expected}\n\nActual result:\n#{result}\n\n"
   end
-end
+  def assert_header_id(md, header, id)
+    assert DevcenterParser.to_html(md, :github).include?("<#{header} id=\"#{id}\">"), "GitHub does not generate a #{header} with id #{id}"
+    assert DevcenterParser.to_html(md, :maruku).include?("<#{header} id=\"#{id}\">"), "Maruku does not generate a #{header} with id #{id}"
+  end
+end

data/test/header_id_generator_test.rb ADDED

@@ -0,0 +1,63 @@
+require 'minitest/autorun'
+require 'nokogiri'
+require_relative '../lib/devcenter-parser/header_id_generator'
+describe 'HeaderIdGeneratorTest' do
+  it 'respects existing ids in non-header elements' do
+    html = '<strong id="foo">clean</strong>'
+    assert_equal html, result(html)
+  end
+  it 'inserts ids in subheaders' do
+    html = '<h2>Foo Bar Header 123</h2>'
+    assert_id result(html), 'h2', 'foo-bar-header-123'
+  end
+  it 'generates ids replacing inner non-alphanum chars with dashes' do
+    ['Foo Bar', 'Foo-Bar', 'Foo#bar', 'Foo##Bar', 'Foo##Bar', '-$Foo##Bar$-'].each do |title|
+      html = "<h2>#{title}</h2>"
+      assert_id result(html), 'h2', 'foo-bar'
+    end
+  end
+  describe 'ensures that there are not collisions between ids in subheaders' do
+    it 'by prepending the id of the previous H2 if possible' do
+      html = <<-HTML
+<h2>A</h2>
+  <h3>B</h3>
+    <h4>Z</h4>
+<h2>C</h2>
+  <h3>B</h3>
+    <h4>Z</h4>
+      HTML
+      result = result(html)
+      %w{ a c }.each{ |id| assert_id(result, 'h2', id) }
+      %w{ a-b c-b }.each{ |id| assert_id(result, 'h3', id) }
+      %w{ a-b-z c-b-z }.each{ |id| assert_id(result, 'h4', id) }
+    end
+    it 'by appending numbers for those subheaders with same nesting level and parent header name' do
+      html = <<-HTML
+<h2>A</h2>
+  <h3>B</h3>
+  <h3>B</h3>
+<h2>C</h2>
+<h2>C</h2>
+      HTML
+      result = result(html)
+      %w{ a c-1 c-2 }.each{ |id| assert_id(result, 'h2', id) }
+      %w{ a-b-1 a-b-2 }.each{ |id| assert_id(result, 'h3', id) }
+    end
+  end
+  def assert_id(html, tag, id)
+    assert html.include?("<#{tag} id=\"#{id}\">"), "<#{tag} id=\"#{id}\"> not found"
+  end
+  def result(html)
+    doc = Nokogiri::HTML::DocumentFragment.parse(html)
+    HeaderIdGenerator.new(doc)
+    doc.to_html
+  end
+end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: devcenter-parser
 version: !ruby/object:Gem::Version
-  version: 1.3.9
+  version: 1.4.0
 platform: ruby
 authors:
 - Heroku
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2013-11-07 00:00:00.000000000 Z
+date: 2014-03-11 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: maruku
@@ -28,14 +28,14 @@ dependencies:
   name: nokogiri
   requirement: !ruby/object:Gem::Requirement
     requirements:
-    - - '>='
+    - - ! '>='
       - !ruby/object:Gem::Version
         version: 1.4.4
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
-    - - '>='
+    - - ! '>='
       - !ruby/object:Gem::Version
         version: 1.4.4
 - !ruby/object:Gem::Dependency
@@ -70,14 +70,14 @@ dependencies:
   name: minitest
   requirement: !ruby/object:Gem::Requirement
     requirements:
-    - - '>'
+    - - ! '>'
       - !ruby/object:Gem::Version
         version: '2.0'
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
-    - - '>'
+    - - ! '>'
       - !ruby/object:Gem::Version
         version: '2.0'
 description: Parser for Heroku Dev Center's content
@@ -92,8 +92,12 @@ files:
 - README.md
 - devcenter-parser.gemspec
 - lib/devcenter-parser.rb
+- lib/devcenter-parser/github_parser.rb
+- lib/devcenter-parser/header_id_generator.rb
+- lib/devcenter-parser/maruku_parser.rb
 - lib/devcenter-parser/version.rb
 - test/devcenter-parser_test.rb
+- test/header_id_generator_test.rb
 homepage: https://devcenter.heroku.com
 licenses: []
 metadata: {}
@@ -103,19 +107,20 @@ require_paths:
 - lib
 required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
-  - - '>='
+  - - ! '>='
     - !ruby/object:Gem::Version
       version: '0'
 required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
-  - - '>='
+  - - ! '>='
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 2.0.3
+rubygems_version: 2.2.2
 signing_key:
 specification_version: 4
 summary: Parser for Heroku Dev Center's content
 test_files:
 - test/devcenter-parser_test.rb
+- test/header_id_generator_test.rb