orph 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in orph.gemspec
4
+ gemspec
@@ -0,0 +1,25 @@
1
+ Orph
2
+ ====
3
+
4
+ Orphans (commonly referred to as 'widows') are single-word lines at the end of paragraphs and are generally considered bad form by type nerds. This library removes them with non-breaking spaces.
5
+
6
+ ## Usage
7
+
8
+ >> orph = Orph.new
9
+ => #<Orph:0x000001008e7578 @content_tags=["h1", "h2", "h3", "h4", "h5", "h6", "p", "li", "blockquote", "dt", "dd"]>
10
+
11
+ >> orph.fix("<p>Here's some content.</p>")
12
+ => "<p>Here's some&#160;content.</p>"
13
+
14
+ >> orph.fix "<p><span>some content</span><span>more content</span></p>"
15
+ => "<p><span>some content</span><span>more&#160;content</span></p>"
16
+
17
+ >> orph.content_tags << "span"
18
+ => ["h1", "h2", "h3", "h4", "h5", "h6", "p", "li", "blockquote", "dt", "dd", "span"]
19
+
20
+ >> orph.fix "<p><span>some content</span><span>more content</span></p>"
21
+ => "<p><span>some&#160;content</span><span>more&#160;content</span></p>"
22
+
23
+ * * *
24
+
25
+ (c) 2011 David Eisinger
@@ -0,0 +1,10 @@
1
+ require 'bundler/gem_tasks'
2
+ require 'rake/testtask'
3
+
4
+ task :default => :test
5
+
6
+ Rake::TestTask.new(:test) do |test|
7
+ test.libs << 'lib' << 'test'
8
+ test.pattern = 'test/*.rb'
9
+ test.verbose = true
10
+ end
@@ -0,0 +1,56 @@
1
+ require "orph/version"
2
+ require "nokogiri"
3
+
4
+ class Orph
5
+ attr_accessor :content_tags
6
+
7
+ def initialize
8
+ self.content_tags = %w(h1 h2 h3 h4 h5 h6 p li blockquote dt dd)
9
+ end
10
+
11
+ def fix(html)
12
+ doc = Nokogiri::HTML::DocumentFragment.parse(html, "ASCII")
13
+ parse_nodes(doc.children)
14
+ doc.to_html
15
+ end
16
+
17
+ private
18
+
19
+ def parse_nodes(nodes)
20
+ nodes.each do |node|
21
+ if content_node?(node)
22
+ remove_widow(node.children)
23
+ else
24
+ parse_nodes(node.children)
25
+ end
26
+ end
27
+ end
28
+
29
+ def text_node?(node)
30
+ node.is_a?(Nokogiri::XML::Text) && !node.blank?
31
+ end
32
+
33
+ def content_tag?(node)
34
+ content_tags.include?(node.name)
35
+ end
36
+
37
+ def content_node?(node)
38
+ content_tag?(node) && !node.children.all? { |child| child.blank? || content_tag?(child) }
39
+ end
40
+
41
+ def remove_widow(nodes)
42
+ smash = lambda { |html| html.reverse.sub(" ", "&#160;".reverse).reverse }
43
+
44
+ nodes.reverse.each do |node|
45
+ if text_node?(node) && node.to_s.include?(" ")
46
+ node.replace smash[node.to_html]
47
+ return true
48
+ elsif !node.is_a?(Nokogiri::XML::Text)
49
+ return true if remove_widow(node.children)
50
+ end
51
+ end
52
+
53
+ false
54
+ end
55
+ end
56
+
@@ -0,0 +1,3 @@
1
+ class Orph
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,26 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "orph/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "orph"
7
+ s.version = Orph::VERSION
8
+ s.authors = ["David Eisinger"]
9
+ s.email = ["david.eisinger@gmail.com"]
10
+ s.homepage = ""
11
+ s.summary = %q{Orph is a library for removing typographic orphans}
12
+ s.description = %q{Orphans (commonly referred to as 'widows') are
13
+ single-word lines at the end of paragraphs. This
14
+ library removes them with non-breaking spaces.}
15
+
16
+ s.rubyforge_project = "orph"
17
+
18
+ s.files = `git ls-files`.split("\n")
19
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
20
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
21
+ s.require_paths = ["lib"]
22
+
23
+ s.add_dependency "nokogiri"
24
+
25
+ s.add_development_dependency "riot"
26
+ end
@@ -0,0 +1,89 @@
1
+ require "riot"
2
+ require "orph"
3
+
4
+ context "Orph, removing orphans" do
5
+ setup { Orph.new }
6
+
7
+ context "with a single HTML tag" do
8
+ setup { topic.fix("<p>these are words</p>") }
9
+ asserts_topic.equals("<p>these are&#160;words</p>")
10
+ end
11
+
12
+ context "with multiple HTML tags" do
13
+ setup { topic.fix("<p>one word</p><p>two words</p>") }
14
+ asserts_topic.equals("<p>one&#160;word</p><p>two&#160;words</p>")
15
+ end
16
+
17
+ context "with a non-block-level nested tag" do
18
+ setup { topic.fix "<p>This is <strong>important</strong>.</p>" }
19
+ asserts_topic.equals("<p>This is&#160;<strong>important</strong>.</p>")
20
+ end
21
+
22
+ context "with nested HTML tags" do
23
+ setup { topic.fix("<ul><li>one word</li><li>two words</li></ul>").gsub(/\n/, '') }
24
+ asserts_topic.equals("<ul><li>one&#160;word</li><li>two&#160;words</li></ul>")
25
+ end
26
+
27
+ context "with a space inside an HTML tag" do
28
+ setup { topic.fix('<p><a href="http://google.com/">Google</a></p>') }
29
+ asserts_topic.equals('<p><a href="http://google.com/">Google</a></p>')
30
+ end
31
+
32
+ context "with text and a short link" do
33
+ setup { topic.fix('<p>This is a <a href="#">link</a></p>') }
34
+ asserts_topic.equals('<p>This is a&#160;<a href="#">link</a></p>')
35
+ end
36
+
37
+ context "with text, a long link, and ending text" do
38
+ setup { topic.fix('<p>This is a <a href="#">long link</a>.</p>') }
39
+ asserts_topic.equals('<p>This is a <a href="#">long&#160;link</a>.</p>')
40
+ end
41
+
42
+ context "with paragraph consisting of two links" do
43
+ setup { topic.fix('<p><a href="#">link one</a><a href="#">link two</a></p>') }
44
+ asserts_topic.equals('<p><a href="#">link one</a><a href="#">link&#160;two</a></p>')
45
+ end
46
+
47
+ context "with a div with two paragraphs and free text" do
48
+ setup do
49
+ html = '<div>some text<p>paragraph 1</p><p>paragraph 2</p></div>'
50
+ topic.fix(html).gsub(/( |\n)/, "")
51
+ end
52
+
53
+ asserts_topic.includes("some text")
54
+ asserts_topic.includes("<p>paragraph&#160;1</p>")
55
+ asserts_topic.includes("<p>paragraph&#160;2</p>")
56
+ end
57
+
58
+ context "with a UL containing paragraphs" do
59
+ setup do
60
+ html = <<-HTML
61
+ <div>
62
+ <ul>
63
+ <li>some text</li>
64
+ <li>
65
+ <p>par. 1</p>
66
+ <p>par. 2</p>
67
+ </li>
68
+ </ul>
69
+ </div>
70
+ HTML
71
+
72
+ topic.fix(html).gsub(/( |\n)/, "")
73
+ end
74
+
75
+ asserts_topic.includes("<li>some&#160;text</li>")
76
+ asserts_topic.includes("<li><p>par.&#160;1</p><p>par.&#160;2</p></li>")
77
+ end
78
+
79
+ context "with span as a content container" do
80
+ setup do
81
+ topic.content_tags << "span"
82
+ topic.fix("<p><span>span 1</span><span>span 2</span></p>")
83
+ end
84
+
85
+ asserts_topic.includes("<span>span&#160;1</span>")
86
+ asserts_topic.includes("<span>span&#160;2</span>")
87
+ end
88
+ end
89
+
metadata ADDED
@@ -0,0 +1,80 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: orph
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - David Eisinger
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-10-03 00:00:00.000000000 -04:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: nokogiri
17
+ requirement: &2153291960 !ruby/object:Gem::Requirement
18
+ none: false
19
+ requirements:
20
+ - - ! '>='
21
+ - !ruby/object:Gem::Version
22
+ version: '0'
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: *2153291960
26
+ - !ruby/object:Gem::Dependency
27
+ name: riot
28
+ requirement: &2153291540 !ruby/object:Gem::Requirement
29
+ none: false
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: *2153291540
37
+ description: ! "Orphans (commonly referred to as 'widows') are\n single-word
38
+ lines at the end of paragraphs. This\n library removes them
39
+ with non-breaking spaces."
40
+ email:
41
+ - david.eisinger@gmail.com
42
+ executables: []
43
+ extensions: []
44
+ extra_rdoc_files: []
45
+ files:
46
+ - .gitignore
47
+ - Gemfile
48
+ - README.md
49
+ - Rakefile
50
+ - lib/orph.rb
51
+ - lib/orph/version.rb
52
+ - orph.gemspec
53
+ - test/orph_test.rb
54
+ has_rdoc: true
55
+ homepage: ''
56
+ licenses: []
57
+ post_install_message:
58
+ rdoc_options: []
59
+ require_paths:
60
+ - lib
61
+ required_ruby_version: !ruby/object:Gem::Requirement
62
+ none: false
63
+ requirements:
64
+ - - ! '>='
65
+ - !ruby/object:Gem::Version
66
+ version: '0'
67
+ required_rubygems_version: !ruby/object:Gem::Requirement
68
+ none: false
69
+ requirements:
70
+ - - ! '>='
71
+ - !ruby/object:Gem::Version
72
+ version: '0'
73
+ requirements: []
74
+ rubyforge_project: orph
75
+ rubygems_version: 1.6.2
76
+ signing_key:
77
+ specification_version: 3
78
+ summary: Orph is a library for removing typographic orphans
79
+ test_files:
80
+ - test/orph_test.rb