RubyGems - unmarkdown - Versions diffs - 0.1.0 - Mend

unmarkdown 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: 89763c4c6a33be8a9d51bda83db646aa71966e06
+  data.tar.gz: 0b2588b7f7c9e5d15f6a082cc3a9752169679183
+SHA512:
+  metadata.gz: aa2ad64dfa026571241613e404c60fe216e218e85cdff364b35cd29531002cd9f4cc04e5ebc71c262e695f6244b58819b7eaf6e15ccc02886ab6a678fbbdc8e3
+  data.tar.gz: ad77913f14ec92cb9813aa3be5d14d36c23656388d5ba2c7fa0d104e99aac94114d6beac1c3d29baaab9bcdabbbb91fd003cc0b0d3cf27b0608cdb79faacd922

data/.gitignore ADDED Viewed

@@ -0,0 +1,17 @@
+*.gem
+*.rbc
+.bundle
+.config
+.yardoc
+Gemfile.lock
+InstalledFiles
+_yardoc
+coverage
+doc/
+lib/bundler/man
+pkg
+rdoc
+spec/reports
+test/tmp
+test/version_tmp
+tmp

data/Gemfile ADDED Viewed

@@ -0,0 +1,10 @@
+source 'https://rubygems.org'
+gemspec
+gem 'rake'
+group :test do
+  gem 'minitest'
+  gem 'minitest-rg'
+end

data/LICENSE ADDED Viewed

@@ -0,0 +1,22 @@
+Copyright (c) 2013 Sam Soffes, http://soff.es
+MIT License
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/Rakefile ADDED Viewed

@@ -0,0 +1,8 @@
+require 'bundler/gem_tasks'
+require 'rake/testtask'
+Rake::TestTask.new(:test) do |t|
+  t.libs << 'test'
+  t.pattern = 'test/**/*_test.rb'
+end
+task default: :test

data/Readme.markdown ADDED Viewed

@@ -0,0 +1,62 @@
+# Unmarkdown
+Convert HTML to Markdown with Ruby.
+## Installation
+Add this line to your application's Gemfile:
+``` ruby
+gem 'unmarkdown'
+```
+And then execute:
+    $ bundle
+Or install it yourself as:
+    $ gem install unmarkdown
+## Usage
+``` ruby
+markdown = Unmarkdown.parse('Some <strong>HTML</strong>')
+#=> Some **HTML**
+markdown = Unmarkdown.parse('My website is http://soff.es', autolink: true')
+#=> My website is <a href="http://soff.es">http://soff.es</a>
+```
+## Support
+### Supported tags
+* h1-h6
+* blockquote
+* ul, ol, li
+* pre
+* hr
+* a
+* em, i
+* strong, b
+* u
+* mark
+* code
+* img
+For tags that aren't supported, their content will be added to the output. Basically it treats everything like a `<p>`.
+### Options
+* `fenced_code_blocks` — Uses three backticks before and after instead of four spaces before each line
+* `allow_scripts` — By default, script tags are removed. If you set this option to `true` their original HTML will be included in the output
+* `underline_headers` — By default number signs are added before headers. If you turn this option on, it will use equal signs for h1's or hypens for h2's and the reset will remain number signs.
+## Contributing
+1. Fork it ( http://github.com/soffes/unmarkdown/fork )
+2. Create your feature branch (`git checkout -b my-new-feature`)
+3. Commit your changes (`git commit -am 'Add some feature'`)
+4. Push to the branch (`git push origin my-new-feature`)
+5. Create new Pull Request

data/lib/unmarkdown.rb ADDED Viewed

@@ -0,0 +1,12 @@
+require 'unmarkdown/version'
+require 'unmarkdown/parser'
+module Unmarkdown
+  module_function
+  # Takes an HTML string and returns a Markdown string
+  def parse(html, options = {})
+    Parser.new(html, options).parse
+  end
+end

data/lib/unmarkdown/parser.rb ADDED Viewed

@@ -0,0 +1,146 @@
+require 'nokogiri'
+module Unmarkdown
+  class Parser
+    BLOCK_ELEMENT_NAMES = %w{h1 h2 h3 h4 h5 h6 blockquote pre hr ul ol li p div}.freeze
+    AUTOLINK_URL_REGEX = /((?:https?|ftp):[^'"\s]+)/i.freeze
+    AUTOLINK_EMAIL_REGEX = %r{([-.\w]+\@[-a-z0-9]+(?:\.[-a-z0-9]+)*\.[a-z]+)}i.freeze
+    def initialize(html, options = {})
+      @html = html
+      @options = options
+    end
+    def parse
+      # Setup document
+      doc = Nokogiri::HTML(@html)
+      doc.encoding = 'UTF-8'
+      # Reset bookkeeping
+      @list = []
+      @list_position = []
+      # Parse the root node recursively
+      root_node = doc.xpath('//body')
+      markdown = parse_nodes(root_node.children)
+      # Strip whitespace
+      markdown.rstrip.gsub(/\n{2}+/, "\n\n")
+      # TODO: Strip trailing whitespace
+    end
+    private
+    # Parse the children of a node
+    def parse_nodes(nodes)
+      output = ''
+      # Short-circuit if it's empty
+      return output if !nodes || nodes.empty?
+      # Loop through nodes
+      nodes.each do |node|
+        case node.name
+        when 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'
+          level = node.name.match(/\Ah(\d)\Z/)[1].to_i
+          if @options[:underline_headers] && level < 3
+            content = parse_content(node)
+            output << content + "\n"
+            character = level == 1 ? '=' : '-'
+            content.length.times { output << character}
+          else
+            hashes = ''
+            level.times { hashes << '#' }
+            output << "#{hashes} #{parse_content(node)}"
+          end
+        when 'blockquote'
+          parse_content(node).split("\n").each do |line|
+            output << "> #{line}\n"
+          end
+        when 'ul', 'ol'
+          output << "\n\n" if @list.count > 0
+          if unordered = node.name == 'ul'
+            @list << :unordered
+          else
+            @list << :ordered
+            @list_position << 0
+          end
+          output << parse_nodes(node.children)
+          @list.pop
+          @list_position.pop unless unordered
+        when 'li'
+          (@list.count - 1).times { output << '    ' }
+          if @list.last == :unordered
+            output << "* #{parse_content(node)}"
+          else
+            num = (@list_position[@list_position.count - 1] += 1)
+            output << "#{num}. #{parse_content(node)}"
+          end
+        when 'pre'
+          content = parse_content(node)
+          if @options[:fenced_code_blocks]
+            output << "```\n#{content}\n```"
+          else
+            content.split("\n").each do |line|
+              output << "    #{line}\n"
+            end
+          end
+        when 'hr'
+          output << "---\n\n"
+        when 'a'
+          output << "[#{parse_content(node)}](#{node['href'] + build_title(node)})"
+        when 'i', 'em'
+          output << "*#{parse_content(node)}*"
+        when 'b', 'strong'
+          output << "**#{parse_content(node)}**"
+        when 'u'
+          output << "_#{parse_content(node)}_"
+        when 'mark'
+          output << "==#{parse_content(node)}=="
+        when 'code'
+          output << "`#{parse_content(node)}`"
+        when 'img'
+          output << "![#{node['alt']}](#{node['src'] + build_title(node)})"
+        when 'text'
+          content = parse_content(node)
+          # Optionally look for links
+          content.gsub!(AUTOLINK_URL_REGEX, '<\1>') if @options[:autolink]
+          content.gsub!(AUTOLINK_EMAIL_REGEX, '<\1>') if @options[:autolink]
+          output << content
+        when 'script'
+          next unless @options[:allow_scripts]
+          output << node.to_html
+        else
+          # If it's an supported node or a node that just contains text, just get
+          # its content
+          output << parse_content(node)
+        end
+        output << "\n\n" if BLOCK_ELEMENT_NAMES.include?(node.name)
+      end
+      output
+    end
+    # Get the content from a node
+    def parse_content(node)
+      content = if node.children.empty?
+        node.content
+      else
+        parse_nodes(node.children)
+      end
+    end
+    # Build the title for links or images
+    def build_title(node)
+      node['title'] ? %Q{ "#{node['title']}"} : ''
+    end
+  end
+end

data/lib/unmarkdown/version.rb ADDED Viewed

@@ -0,0 +1,3 @@
+module Unmarkdown
+  VERSION = '0.1.0'
+end

data/test/parser_test.rb ADDED Viewed

@@ -0,0 +1,156 @@
+require 'test_helper'
+class ParserTest < Unmarkdown::Test
+  include Unmarkdown
+  def test_headers
+    6.times do |i|
+      i += 1
+      html = "<h#{i}>Header</h#{i}>"
+      markdown = ''
+      i.times { markdown << '#' }
+      markdown << ' Header'
+      assert_equal markdown, parse(html)
+    end
+    html = '<h1>Something Huge</h1>'
+    markdown = "Something Huge\n=============="
+    assert_equal markdown, parse(html, underline_headers: true)
+    html = '<h2>Something Smaller</h1>'
+    markdown = "Something Smaller\n-----------------"
+    assert_equal markdown, parse(html, underline_headers: true)
+  end
+  def test_blockquote
+    html = '<blockquote>Awesome.</blockquote>'
+    markdown = '> Awesome.'
+    assert_equal markdown, parse(html)
+  end
+  def test_unorder_list
+    html = '<ul><li>Ruby<ul><li>Gem</li><li>Stuff</li></ul></li><li>Objective-C</li></ul>'
+    markdown = "* Ruby\n\n    * Gem\n\n    * Stuff\n\n* Objective-C"
+    assert_equal markdown, parse(html)
+  end
+  def test_ordered_list
+    html = '<ol><li>Ruby<ol><li>Gem</li><li>Stuff</li></ol></li><li>Objective-C</li></ol>'
+    markdown = "1. Ruby\n\n    1. Gem\n\n    2. Stuff\n\n2. Objective-C"
+    assert_equal markdown, parse(html)
+  end
+  def test_code_block
+    html = "<pre>puts 'Hello world'</pre>"
+    markdown = "    puts 'Hello world'"
+    assert_equal markdown, parse(html)
+    html = "<pre>puts 'Hello world'</pre>"
+    markdown = "```\nputs 'Hello world'\n```"
+    assert_equal markdown, parse(html, fenced_code_blocks: true)
+  end
+  def test_line_break
+    html = '<hr>'
+    markdown = '---'
+    assert_equal markdown, parse(html)
+  end
+  def test_link
+    html = '<a href="http://soff.es">Sam Soffes</a>'
+    markdown = '[Sam Soffes](http://soff.es)'
+    assert_equal markdown, parse(html)
+    html = '<a href="http://soff.es" title="My site">Sam Soffes</a>'
+    markdown = '[Sam Soffes](http://soff.es "My site")'
+    assert_equal markdown, parse(html)
+  end
+  def test_emphasis
+    html = '<i>italic</i>'
+    markdown = '*italic*'
+    assert_equal markdown, parse(html)
+    html = '<em>italic</em>'
+    markdown = '*italic*'
+    assert_equal markdown, parse(html)
+  end
+  def test_double_emphasis
+    html = '<b>bold</b>'
+    markdown = '**bold**'
+    assert_equal markdown, parse(html)
+    html = '<strong>bold</strong>'
+    markdown = '**bold**'
+    assert_equal markdown, parse(html)
+  end
+  def test_triple_emphasis
+    html = '<b><i>bold italic</i></b>'
+    markdown = '***bold italic***'
+    assert_equal markdown, parse(html)
+  end
+  def test_underline
+    html = '<u>underline</u>'
+    markdown = '_underline_'
+    assert_equal markdown, parse(html)
+  end
+  def test_bold_underline
+    html = '<b><u>underline</u></b>'
+    markdown = '**_underline_**'
+    assert_equal markdown, parse(html)
+    html = '<u><b>underline</b></u>'
+    markdown = '_**underline**_'
+    assert_equal markdown, parse(html)
+  end
+  def test_mark
+    html = '<mark>highlighted</mark>'
+    markdown = '==highlighted=='
+    assert_equal markdown, parse(html)
+  end
+  def test_code
+    html = '<code>Unmarkdown.parse(html)</code>'
+    markdown = '`Unmarkdown.parse(html)`'
+    assert_equal markdown, parse(html)
+  end
+  def test_image
+    html = '<img src="http://soffes-assets.s3.amazonaws.com/images/Sam-Soffes.jpg">'
+    markdown = '![](http://soffes-assets.s3.amazonaws.com/images/Sam-Soffes.jpg)'
+    assert_equal markdown, parse(html)
+    html = '<img src="http://soffes-assets.s3.amazonaws.com/images/Sam-Soffes.jpg" alt="Sam Soffes">'
+    markdown = '![Sam Soffes](http://soffes-assets.s3.amazonaws.com/images/Sam-Soffes.jpg)'
+    assert_equal markdown, parse(html)
+    html = '<img src="http://soffes-assets.s3.amazonaws.com/images/Sam-Soffes.jpg" title="That guy">'
+    markdown = '![](http://soffes-assets.s3.amazonaws.com/images/Sam-Soffes.jpg "That guy")'
+    assert_equal markdown, parse(html)
+  end
+  def test_script
+    html = %Q{<blockquote class="twitter-tweet"><p><a href="https://twitter.com/soffes">@soffes</a> If people think Apple is going to redo their promo videos and 3D animation intros for iOS 7 they&#39;re crazy. The design is ~final.</p>&mdash; Mike Rundle (@flyosity) <a href="https://twitter.com/flyosity/statuses/348358938296733696">June 22, 2013</a></blockquote>\n<script async src="//platform.twitter.com/widgets.js" charset="utf-8"></script>}
+    markdown = %Q{> [@soffes](https://twitter.com/soffes) If people think Apple is going to redo their promo videos and 3D animation intros for iOS 7 they're crazy. The design is ~final.\n> \n> — Mike Rundle (@flyosity) [June 22, 2013](https://twitter.com/flyosity/statuses/348358938296733696)}
+    assert_equal markdown, parse(html)
+    html = %Q{<blockquote class="twitter-tweet"><p><a href="https://twitter.com/soffes">@soffes</a> If people think Apple is going to redo their promo videos and 3D animation intros for iOS 7 they&#39;re crazy. The design is ~final.</p>&mdash; Mike Rundle (@flyosity) <a href="https://twitter.com/flyosity/statuses/348358938296733696">June 22, 2013</a></blockquote>\n<script async src="//platform.twitter.com/widgets.js" charset="utf-8"></script>}
+    markdown = %Q{> [@soffes](https://twitter.com/soffes) If people think Apple is going to redo their promo videos and 3D animation intros for iOS 7 they're crazy. The design is ~final.\n> \n> — Mike Rundle (@flyosity) [June 22, 2013](https://twitter.com/flyosity/statuses/348358938296733696)\n\n<script async src="//platform.twitter.com/widgets.js" charset="utf-8"></script>}
+    assert_equal markdown, parse(html, allow_scripts: true)
+  end
+  def test_autolink
+    html = 'Head to http://soff.es and email sam@soff.es'
+    assert_equal html, parse(html)
+    markdown = 'Head to <http://soff.es> and email <sam@soff.es>'
+    assert_equal markdown, parse(html, autolink: true)
+  end
+end

data/test/test_helper.rb ADDED Viewed

@@ -0,0 +1,19 @@
+require 'rubygems'
+require 'bundler'
+Bundler.require :test
+if ENV['COVERAGE']
+  require 'simplecov'
+  SimpleCov.start
+end
+require 'minitest/autorun'
+require 'unmarkdown'
+# Support files
+Dir["#{File.expand_path(File.dirname(__FILE__))}/support/*.rb"].each do |file|
+  require file
+end
+class Unmarkdown::Test < MiniTest::Test
+end

data/test/unmarkdown_test.rb ADDED Viewed

@@ -0,0 +1,9 @@
+require 'test_helper'
+module Unmarkdown
+  class UnmarkdownTest < Test
+    def test_that_it_parses
+      refute_nil Unmarkdown.parse('foo')
+    end
+  end
+end

data/unmarkdown.gemspec ADDED Viewed

@@ -0,0 +1,26 @@
+# coding: utf-8
+lib = File.expand_path('../lib', __FILE__)
+$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
+require 'unmarkdown/version'
+Gem::Specification.new do |spec|
+  spec.name          = 'unmarkdown'
+  spec.version       = Unmarkdown::VERSION
+  spec.authors       = ['Sam Soffes']
+  spec.email         = ['sam@soff.es']
+  spec.summary       = 'Convert HTML to Markdown'
+  spec.homepage      = 'https://github.com/soffes/unmarkdown'
+  spec.license       = 'MIT'
+  spec.files         = `git ls-files`.split($/)
+  spec.executables   = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
+  spec.test_files    = spec.files.grep(%r{^(test|spec|features)/})
+  spec.require_paths = ['lib']
+  spec.required_ruby_version = '>= 1.9.2'
+  spec.add_development_dependency 'bundler'
+  # HTML parsing
+  spec.add_dependency 'nokogiri'
+end

metadata ADDED Viewed

@@ -0,0 +1,87 @@
+--- !ruby/object:Gem::Specification
+name: unmarkdown
+version: !ruby/object:Gem::Version
+  version: 0.1.0
+platform: ruby
+authors:
+- Sam Soffes
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2013-12-15 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: bundler
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+- !ruby/object:Gem::Dependency
+  name: nokogiri
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+description:
+email:
+- sam@soff.es
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- .gitignore
+- Gemfile
+- LICENSE
+- Rakefile
+- Readme.markdown
+- lib/unmarkdown.rb
+- lib/unmarkdown/parser.rb
+- lib/unmarkdown/version.rb
+- test/parser_test.rb
+- test/test_helper.rb
+- test/unmarkdown_test.rb
+- unmarkdown.gemspec
+homepage: https://github.com/soffes/unmarkdown
+licenses:
+- MIT
+metadata: {}
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - '>='
+    - !ruby/object:Gem::Version
+      version: 1.9.2
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 2.0.3
+signing_key:
+specification_version: 4
+summary: Convert HTML to Markdown
+test_files:
+- test/parser_test.rb
+- test/test_helper.rb
+- test/unmarkdown_test.rb