orph 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in orph.gemspec
4
+ gemspec
@@ -0,0 +1,25 @@
1
+ Orph
2
+ ====
3
+
4
+ Orphans (commonly referred to as 'widows') are single-word lines at the end of paragraphs and are generally considered bad form by type nerds. This library removes them with non-breaking spaces.
5
+
6
+ ## Usage
7
+
8
+ >> orph = Orph.new
9
+ => #<Orph:0x000001008e7578 @content_tags=["h1", "h2", "h3", "h4", "h5", "h6", "p", "li", "blockquote", "dt", "dd"]>
10
+
11
+ >> orph.fix("<p>Here's some content.</p>")
12
+ => "<p>Here's some&#160;content.</p>"
13
+
14
+ >> orph.fix "<p><span>some content</span><span>more content</span></p>"
15
+ => "<p><span>some content</span><span>more&#160;content</span></p>"
16
+
17
+ >> orph.content_tags << "span"
18
+ => ["h1", "h2", "h3", "h4", "h5", "h6", "p", "li", "blockquote", "dt", "dd", "span"]
19
+
20
+ >> orph.fix "<p><span>some content</span><span>more content</span></p>"
21
+ => "<p><span>some&#160;content</span><span>more&#160;content</span></p>"
22
+
23
+ * * *
24
+
25
+ (c) 2011 David Eisinger
@@ -0,0 +1,10 @@
1
+ require 'bundler/gem_tasks'
2
+ require 'rake/testtask'
3
+
4
+ task :default => :test
5
+
6
+ Rake::TestTask.new(:test) do |test|
7
+ test.libs << 'lib' << 'test'
8
+ test.pattern = 'test/*.rb'
9
+ test.verbose = true
10
+ end
@@ -0,0 +1,56 @@
1
+ require "orph/version"
2
+ require "nokogiri"
3
+
4
+ class Orph
5
+ attr_accessor :content_tags
6
+
7
+ def initialize
8
+ self.content_tags = %w(h1 h2 h3 h4 h5 h6 p li blockquote dt dd)
9
+ end
10
+
11
+ def fix(html)
12
+ doc = Nokogiri::HTML::DocumentFragment.parse(html, "ASCII")
13
+ parse_nodes(doc.children)
14
+ doc.to_html
15
+ end
16
+
17
+ private
18
+
19
+ def parse_nodes(nodes)
20
+ nodes.each do |node|
21
+ if content_node?(node)
22
+ remove_widow(node.children)
23
+ else
24
+ parse_nodes(node.children)
25
+ end
26
+ end
27
+ end
28
+
29
+ def text_node?(node)
30
+ node.is_a?(Nokogiri::XML::Text) && !node.blank?
31
+ end
32
+
33
+ def content_tag?(node)
34
+ content_tags.include?(node.name)
35
+ end
36
+
37
+ def content_node?(node)
38
+ content_tag?(node) && !node.children.all? { |child| child.blank? || content_tag?(child) }
39
+ end
40
+
41
+ def remove_widow(nodes)
42
+ smash = lambda { |html| html.reverse.sub(" ", "&#160;".reverse).reverse }
43
+
44
+ nodes.reverse.each do |node|
45
+ if text_node?(node) && node.to_s.include?(" ")
46
+ node.replace smash[node.to_html]
47
+ return true
48
+ elsif !node.is_a?(Nokogiri::XML::Text)
49
+ return true if remove_widow(node.children)
50
+ end
51
+ end
52
+
53
+ false
54
+ end
55
+ end
56
+
@@ -0,0 +1,3 @@
1
+ class Orph
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,26 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "orph/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "orph"
7
+ s.version = Orph::VERSION
8
+ s.authors = ["David Eisinger"]
9
+ s.email = ["david.eisinger@gmail.com"]
10
+ s.homepage = ""
11
+ s.summary = %q{Orph is a library for removing typographic orphans}
12
+ s.description = %q{Orphans (commonly referred to as 'widows') are
13
+ single-word lines at the end of paragraphs. This
14
+ library removes them with non-breaking spaces.}
15
+
16
+ s.rubyforge_project = "orph"
17
+
18
+ s.files = `git ls-files`.split("\n")
19
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
20
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
21
+ s.require_paths = ["lib"]
22
+
23
+ s.add_dependency "nokogiri"
24
+
25
+ s.add_development_dependency "riot"
26
+ end
@@ -0,0 +1,89 @@
1
+ require "riot"
2
+ require "orph"
3
+
4
+ context "Orph, removing orphans" do
5
+ setup { Orph.new }
6
+
7
+ context "with a single HTML tag" do
8
+ setup { topic.fix("<p>these are words</p>") }
9
+ asserts_topic.equals("<p>these are&#160;words</p>")
10
+ end
11
+
12
+ context "with multiple HTML tags" do
13
+ setup { topic.fix("<p>one word</p><p>two words</p>") }
14
+ asserts_topic.equals("<p>one&#160;word</p><p>two&#160;words</p>")
15
+ end
16
+
17
+ context "with a non-block-level nested tag" do
18
+ setup { topic.fix "<p>This is <strong>important</strong>.</p>" }
19
+ asserts_topic.equals("<p>This is&#160;<strong>important</strong>.</p>")
20
+ end
21
+
22
+ context "with nested HTML tags" do
23
+ setup { topic.fix("<ul><li>one word</li><li>two words</li></ul>").gsub(/\n/, '') }
24
+ asserts_topic.equals("<ul><li>one&#160;word</li><li>two&#160;words</li></ul>")
25
+ end
26
+
27
+ context "with a space inside an HTML tag" do
28
+ setup { topic.fix('<p><a href="http://google.com/">Google</a></p>') }
29
+ asserts_topic.equals('<p><a href="http://google.com/">Google</a></p>')
30
+ end
31
+
32
+ context "with text and a short link" do
33
+ setup { topic.fix('<p>This is a <a href="#">link</a></p>') }
34
+ asserts_topic.equals('<p>This is a&#160;<a href="#">link</a></p>')
35
+ end
36
+
37
+ context "with text, a long link, and ending text" do
38
+ setup { topic.fix('<p>This is a <a href="#">long link</a>.</p>') }
39
+ asserts_topic.equals('<p>This is a <a href="#">long&#160;link</a>.</p>')
40
+ end
41
+
42
+ context "with paragraph consisting of two links" do
43
+ setup { topic.fix('<p><a href="#">link one</a><a href="#">link two</a></p>') }
44
+ asserts_topic.equals('<p><a href="#">link one</a><a href="#">link&#160;two</a></p>')
45
+ end
46
+
47
+ context "with a div with two paragraphs and free text" do
48
+ setup do
49
+ html = '<div>some text<p>paragraph 1</p><p>paragraph 2</p></div>'
50
+ topic.fix(html).gsub(/( |\n)/, "")
51
+ end
52
+
53
+ asserts_topic.includes("some text")
54
+ asserts_topic.includes("<p>paragraph&#160;1</p>")
55
+ asserts_topic.includes("<p>paragraph&#160;2</p>")
56
+ end
57
+
58
+ context "with a UL containing paragraphs" do
59
+ setup do
60
+ html = <<-HTML
61
+ <div>
62
+ <ul>
63
+ <li>some text</li>
64
+ <li>
65
+ <p>par. 1</p>
66
+ <p>par. 2</p>
67
+ </li>
68
+ </ul>
69
+ </div>
70
+ HTML
71
+
72
+ topic.fix(html).gsub(/( |\n)/, "")
73
+ end
74
+
75
+ asserts_topic.includes("<li>some&#160;text</li>")
76
+ asserts_topic.includes("<li><p>par.&#160;1</p><p>par.&#160;2</p></li>")
77
+ end
78
+
79
+ context "with span as a content container" do
80
+ setup do
81
+ topic.content_tags << "span"
82
+ topic.fix("<p><span>span 1</span><span>span 2</span></p>")
83
+ end
84
+
85
+ asserts_topic.includes("<span>span&#160;1</span>")
86
+ asserts_topic.includes("<span>span&#160;2</span>")
87
+ end
88
+ end
89
+
metadata ADDED
@@ -0,0 +1,80 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: orph
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - David Eisinger
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-10-03 00:00:00.000000000 -04:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: nokogiri
17
+ requirement: &2153291960 !ruby/object:Gem::Requirement
18
+ none: false
19
+ requirements:
20
+ - - ! '>='
21
+ - !ruby/object:Gem::Version
22
+ version: '0'
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: *2153291960
26
+ - !ruby/object:Gem::Dependency
27
+ name: riot
28
+ requirement: &2153291540 !ruby/object:Gem::Requirement
29
+ none: false
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: *2153291540
37
+ description: ! "Orphans (commonly referred to as 'widows') are\n single-word
38
+ lines at the end of paragraphs. This\n library removes them
39
+ with non-breaking spaces."
40
+ email:
41
+ - david.eisinger@gmail.com
42
+ executables: []
43
+ extensions: []
44
+ extra_rdoc_files: []
45
+ files:
46
+ - .gitignore
47
+ - Gemfile
48
+ - README.md
49
+ - Rakefile
50
+ - lib/orph.rb
51
+ - lib/orph/version.rb
52
+ - orph.gemspec
53
+ - test/orph_test.rb
54
+ has_rdoc: true
55
+ homepage: ''
56
+ licenses: []
57
+ post_install_message:
58
+ rdoc_options: []
59
+ require_paths:
60
+ - lib
61
+ required_ruby_version: !ruby/object:Gem::Requirement
62
+ none: false
63
+ requirements:
64
+ - - ! '>='
65
+ - !ruby/object:Gem::Version
66
+ version: '0'
67
+ required_rubygems_version: !ruby/object:Gem::Requirement
68
+ none: false
69
+ requirements:
70
+ - - ! '>='
71
+ - !ruby/object:Gem::Version
72
+ version: '0'
73
+ requirements: []
74
+ rubyforge_project: orph
75
+ rubygems_version: 1.6.2
76
+ signing_key:
77
+ specification_version: 3
78
+ summary: Orph is a library for removing typographic orphans
79
+ test_files:
80
+ - test/orph_test.rb