RubyGems - orph - Versions diffs - 0.1.0 - Mend

orph 0.1.0

Files changed (9) hide show

data/.gitignore ADDED

@@ -0,0 +1,4 @@
+*.gem
+.bundle
+Gemfile.lock
+pkg/*

data/Gemfile ADDED

@@ -0,0 +1,4 @@
+source "http://rubygems.org"
+# Specify your gem's dependencies in orph.gemspec
+gemspec

data/README.md ADDED

@@ -0,0 +1,25 @@
+Orph
+====
+Orphans (commonly referred to as 'widows') are single-word lines at the end of paragraphs and are generally considered bad form by type nerds. This library removes them with non-breaking spaces.
+## Usage
+    >> orph = Orph.new
+    => #<Orph:0x000001008e7578 @content_tags=["h1", "h2", "h3", "h4", "h5", "h6", "p", "li", "blockquote", "dt", "dd"]>
+    >> orph.fix("<p>Here's some content.</p>")
+    => "<p>Here's some&#160;content.</p>"
+    >> orph.fix "<p><span>some content</span><span>more content</span></p>"
+    => "<p><span>some content</span><span>more&#160;content</span></p>"
+    >> orph.content_tags << "span"
+    => ["h1", "h2", "h3", "h4", "h5", "h6", "p", "li", "blockquote", "dt", "dd", "span"]
+    >> orph.fix "<p><span>some content</span><span>more content</span></p>"
+    => "<p><span>some&#160;content</span><span>more&#160;content</span></p>"
+* * *
+(c) 2011 David Eisinger

data/Rakefile ADDED

@@ -0,0 +1,10 @@
+require 'bundler/gem_tasks'
+require 'rake/testtask'
+task :default => :test
+Rake::TestTask.new(:test) do |test|
+  test.libs << 'lib' << 'test'
+  test.pattern = 'test/*.rb'
+  test.verbose = true
+end

data/lib/orph.rb ADDED

@@ -0,0 +1,56 @@
+require "orph/version"
+require "nokogiri"
+class Orph
+  attr_accessor :content_tags
+  def initialize
+    self.content_tags = %w(h1 h2 h3 h4 h5 h6 p li blockquote dt dd)
+  end
+  def fix(html)
+    doc = Nokogiri::HTML::DocumentFragment.parse(html, "ASCII")
+    parse_nodes(doc.children)
+    doc.to_html
+  end
+  private
+  def parse_nodes(nodes)
+    nodes.each do |node|
+      if content_node?(node)
+        remove_widow(node.children)
+      else
+        parse_nodes(node.children)
+      end
+    end
+  end
+  def text_node?(node)
+    node.is_a?(Nokogiri::XML::Text) && !node.blank?
+  end
+  def content_tag?(node)
+    content_tags.include?(node.name)
+  end
+  def content_node?(node)
+    content_tag?(node) && !node.children.all? { |child| child.blank? || content_tag?(child) }
+  end
+  def remove_widow(nodes)
+    smash = lambda { |html| html.reverse.sub(" ", "&#160;".reverse).reverse }
+    nodes.reverse.each do |node|
+      if text_node?(node) && node.to_s.include?(" ")
+        node.replace smash[node.to_html]
+        return true
+      elsif !node.is_a?(Nokogiri::XML::Text)
+        return true if remove_widow(node.children)
+      end
+    end
+    false
+  end
+end

data/lib/orph/version.rb ADDED

@@ -0,0 +1,3 @@
+class Orph
+  VERSION = "0.1.0"
+end

data/orph.gemspec ADDED

@@ -0,0 +1,26 @@
+# -*- encoding: utf-8 -*-
+$:.push File.expand_path("../lib", __FILE__)
+require "orph/version"
+Gem::Specification.new do |s|
+  s.name        = "orph"
+  s.version     = Orph::VERSION
+  s.authors     = ["David Eisinger"]
+  s.email       = ["david.eisinger@gmail.com"]
+  s.homepage    = ""
+  s.summary     = %q{Orph is a library for removing typographic orphans}
+  s.description = %q{Orphans (commonly referred to as 'widows') are
+                     single-word lines at the end of paragraphs. This
+                     library removes them with non-breaking spaces.}
+  s.rubyforge_project = "orph"
+  s.files         = `git ls-files`.split("\n")
+  s.test_files    = `git ls-files -- {test,spec,features}/*`.split("\n")
+  s.executables   = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
+  s.require_paths = ["lib"]
+  s.add_dependency "nokogiri"
+  s.add_development_dependency "riot"
+end

data/test/orph_test.rb ADDED

@@ -0,0 +1,89 @@
+require "riot"
+require "orph"
+context "Orph, removing orphans" do
+  setup { Orph.new }
+  context "with a single HTML tag" do
+    setup { topic.fix("<p>these are words</p>") }
+    asserts_topic.equals("<p>these are&#160;words</p>")
+  end
+  context "with multiple HTML tags" do
+    setup { topic.fix("<p>one word</p><p>two words</p>") }
+    asserts_topic.equals("<p>one&#160;word</p><p>two&#160;words</p>")
+  end
+  context "with a non-block-level nested tag" do
+    setup { topic.fix "<p>This is <strong>important</strong>.</p>" }
+    asserts_topic.equals("<p>This is&#160;<strong>important</strong>.</p>")
+  end
+  context "with nested HTML tags" do
+    setup { topic.fix("<ul><li>one word</li><li>two words</li></ul>").gsub(/\n/, '') }
+    asserts_topic.equals("<ul><li>one&#160;word</li><li>two&#160;words</li></ul>")
+  end
+  context "with a space inside an HTML tag" do
+    setup { topic.fix('<p><a href="http://google.com/">Google</a></p>') }
+    asserts_topic.equals('<p><a href="http://google.com/">Google</a></p>')
+  end
+  context "with text and a short link" do
+    setup { topic.fix('<p>This is a <a href="#">link</a></p>') }
+    asserts_topic.equals('<p>This is a&#160;<a href="#">link</a></p>')
+  end
+  context "with text, a long link, and ending text" do
+    setup { topic.fix('<p>This is a <a href="#">long link</a>.</p>') }
+    asserts_topic.equals('<p>This is a <a href="#">long&#160;link</a>.</p>')
+  end
+  context "with paragraph consisting of two links" do
+    setup { topic.fix('<p><a href="#">link one</a><a href="#">link two</a></p>') }
+    asserts_topic.equals('<p><a href="#">link one</a><a href="#">link&#160;two</a></p>')
+  end
+  context "with a div with two paragraphs and free text" do
+    setup do
+      html = '<div>some text<p>paragraph 1</p><p>paragraph 2</p></div>'
+      topic.fix(html).gsub(/(  |\n)/, "")
+    end
+    asserts_topic.includes("some text")
+    asserts_topic.includes("<p>paragraph&#160;1</p>")
+    asserts_topic.includes("<p>paragraph&#160;2</p>")
+  end
+  context "with a UL containing paragraphs" do
+    setup do
+      html = <<-HTML
+        <div>
+          <ul>
+            <li>some text</li>
+            <li>
+              <p>par. 1</p>
+              <p>par. 2</p>
+            </li>
+          </ul>
+        </div>
+      HTML
+      topic.fix(html).gsub(/(  |\n)/, "")
+    end
+    asserts_topic.includes("<li>some&#160;text</li>")
+    asserts_topic.includes("<li><p>par.&#160;1</p><p>par.&#160;2</p></li>")
+  end
+  context "with span as a content container" do
+    setup do
+      topic.content_tags << "span"
+      topic.fix("<p><span>span 1</span><span>span 2</span></p>")
+    end
+    asserts_topic.includes("<span>span&#160;1</span>")
+    asserts_topic.includes("<span>span&#160;2</span>")
+  end
+end

metadata ADDED

@@ -0,0 +1,80 @@
+--- !ruby/object:Gem::Specification
+name: orph
+version: !ruby/object:Gem::Version
+  version: 0.1.0
+  prerelease:
+platform: ruby
+authors:
+- David Eisinger
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2011-10-03 00:00:00.000000000 -04:00
+default_executable:
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: nokogiri
+  requirement: &2153291960 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :runtime
+  prerelease: false
+  version_requirements: *2153291960
+- !ruby/object:Gem::Dependency
+  name: riot
+  requirement: &2153291540 !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: *2153291540
+description: ! "Orphans (commonly referred to as 'widows') are\n                     single-word
+  lines at the end of paragraphs. This\n                     library removes them
+  with non-breaking spaces."
+email:
+- david.eisinger@gmail.com
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- .gitignore
+- Gemfile
+- README.md
+- Rakefile
+- lib/orph.rb
+- lib/orph/version.rb
+- orph.gemspec
+- test/orph_test.rb
+has_rdoc: true
+homepage: ''
+licenses: []
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project: orph
+rubygems_version: 1.6.2
+signing_key:
+specification_version: 3
+summary: Orph is a library for removing typographic orphans
+test_files:
+- test/orph_test.rb