orph 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +4 -0
- data/Gemfile +4 -0
- data/README.md +25 -0
- data/Rakefile +10 -0
- data/lib/orph.rb +56 -0
- data/lib/orph/version.rb +3 -0
- data/orph.gemspec +26 -0
- data/test/orph_test.rb +89 -0
- metadata +80 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
Orph
|
2
|
+
====
|
3
|
+
|
4
|
+
Orphans (commonly referred to as 'widows') are single-word lines at the end of paragraphs and are generally considered bad form by type nerds. This library removes them with non-breaking spaces.
|
5
|
+
|
6
|
+
## Usage
|
7
|
+
|
8
|
+
>> orph = Orph.new
|
9
|
+
=> #<Orph:0x000001008e7578 @content_tags=["h1", "h2", "h3", "h4", "h5", "h6", "p", "li", "blockquote", "dt", "dd"]>
|
10
|
+
|
11
|
+
>> orph.fix("<p>Here's some content.</p>")
|
12
|
+
=> "<p>Here's some content.</p>"
|
13
|
+
|
14
|
+
>> orph.fix "<p><span>some content</span><span>more content</span></p>"
|
15
|
+
=> "<p><span>some content</span><span>more content</span></p>"
|
16
|
+
|
17
|
+
>> orph.content_tags << "span"
|
18
|
+
=> ["h1", "h2", "h3", "h4", "h5", "h6", "p", "li", "blockquote", "dt", "dd", "span"]
|
19
|
+
|
20
|
+
>> orph.fix "<p><span>some content</span><span>more content</span></p>"
|
21
|
+
=> "<p><span>some content</span><span>more content</span></p>"
|
22
|
+
|
23
|
+
* * *
|
24
|
+
|
25
|
+
(c) 2011 David Eisinger
|
data/Rakefile
ADDED
data/lib/orph.rb
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
require "orph/version"
|
2
|
+
require "nokogiri"
|
3
|
+
|
4
|
+
class Orph
|
5
|
+
attr_accessor :content_tags
|
6
|
+
|
7
|
+
def initialize
|
8
|
+
self.content_tags = %w(h1 h2 h3 h4 h5 h6 p li blockquote dt dd)
|
9
|
+
end
|
10
|
+
|
11
|
+
def fix(html)
|
12
|
+
doc = Nokogiri::HTML::DocumentFragment.parse(html, "ASCII")
|
13
|
+
parse_nodes(doc.children)
|
14
|
+
doc.to_html
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
def parse_nodes(nodes)
|
20
|
+
nodes.each do |node|
|
21
|
+
if content_node?(node)
|
22
|
+
remove_widow(node.children)
|
23
|
+
else
|
24
|
+
parse_nodes(node.children)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def text_node?(node)
|
30
|
+
node.is_a?(Nokogiri::XML::Text) && !node.blank?
|
31
|
+
end
|
32
|
+
|
33
|
+
def content_tag?(node)
|
34
|
+
content_tags.include?(node.name)
|
35
|
+
end
|
36
|
+
|
37
|
+
def content_node?(node)
|
38
|
+
content_tag?(node) && !node.children.all? { |child| child.blank? || content_tag?(child) }
|
39
|
+
end
|
40
|
+
|
41
|
+
def remove_widow(nodes)
|
42
|
+
smash = lambda { |html| html.reverse.sub(" ", " ".reverse).reverse }
|
43
|
+
|
44
|
+
nodes.reverse.each do |node|
|
45
|
+
if text_node?(node) && node.to_s.include?(" ")
|
46
|
+
node.replace smash[node.to_html]
|
47
|
+
return true
|
48
|
+
elsif !node.is_a?(Nokogiri::XML::Text)
|
49
|
+
return true if remove_widow(node.children)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
false
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
data/lib/orph/version.rb
ADDED
data/orph.gemspec
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "orph/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "orph"
|
7
|
+
s.version = Orph::VERSION
|
8
|
+
s.authors = ["David Eisinger"]
|
9
|
+
s.email = ["david.eisinger@gmail.com"]
|
10
|
+
s.homepage = ""
|
11
|
+
s.summary = %q{Orph is a library for removing typographic orphans}
|
12
|
+
s.description = %q{Orphans (commonly referred to as 'widows') are
|
13
|
+
single-word lines at the end of paragraphs. This
|
14
|
+
library removes them with non-breaking spaces.}
|
15
|
+
|
16
|
+
s.rubyforge_project = "orph"
|
17
|
+
|
18
|
+
s.files = `git ls-files`.split("\n")
|
19
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
20
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
21
|
+
s.require_paths = ["lib"]
|
22
|
+
|
23
|
+
s.add_dependency "nokogiri"
|
24
|
+
|
25
|
+
s.add_development_dependency "riot"
|
26
|
+
end
|
data/test/orph_test.rb
ADDED
@@ -0,0 +1,89 @@
|
|
1
|
+
require "riot"
|
2
|
+
require "orph"
|
3
|
+
|
4
|
+
context "Orph, removing orphans" do
|
5
|
+
setup { Orph.new }
|
6
|
+
|
7
|
+
context "with a single HTML tag" do
|
8
|
+
setup { topic.fix("<p>these are words</p>") }
|
9
|
+
asserts_topic.equals("<p>these are words</p>")
|
10
|
+
end
|
11
|
+
|
12
|
+
context "with multiple HTML tags" do
|
13
|
+
setup { topic.fix("<p>one word</p><p>two words</p>") }
|
14
|
+
asserts_topic.equals("<p>one word</p><p>two words</p>")
|
15
|
+
end
|
16
|
+
|
17
|
+
context "with a non-block-level nested tag" do
|
18
|
+
setup { topic.fix "<p>This is <strong>important</strong>.</p>" }
|
19
|
+
asserts_topic.equals("<p>This is <strong>important</strong>.</p>")
|
20
|
+
end
|
21
|
+
|
22
|
+
context "with nested HTML tags" do
|
23
|
+
setup { topic.fix("<ul><li>one word</li><li>two words</li></ul>").gsub(/\n/, '') }
|
24
|
+
asserts_topic.equals("<ul><li>one word</li><li>two words</li></ul>")
|
25
|
+
end
|
26
|
+
|
27
|
+
context "with a space inside an HTML tag" do
|
28
|
+
setup { topic.fix('<p><a href="http://google.com/">Google</a></p>') }
|
29
|
+
asserts_topic.equals('<p><a href="http://google.com/">Google</a></p>')
|
30
|
+
end
|
31
|
+
|
32
|
+
context "with text and a short link" do
|
33
|
+
setup { topic.fix('<p>This is a <a href="#">link</a></p>') }
|
34
|
+
asserts_topic.equals('<p>This is a <a href="#">link</a></p>')
|
35
|
+
end
|
36
|
+
|
37
|
+
context "with text, a long link, and ending text" do
|
38
|
+
setup { topic.fix('<p>This is a <a href="#">long link</a>.</p>') }
|
39
|
+
asserts_topic.equals('<p>This is a <a href="#">long link</a>.</p>')
|
40
|
+
end
|
41
|
+
|
42
|
+
context "with paragraph consisting of two links" do
|
43
|
+
setup { topic.fix('<p><a href="#">link one</a><a href="#">link two</a></p>') }
|
44
|
+
asserts_topic.equals('<p><a href="#">link one</a><a href="#">link two</a></p>')
|
45
|
+
end
|
46
|
+
|
47
|
+
context "with a div with two paragraphs and free text" do
|
48
|
+
setup do
|
49
|
+
html = '<div>some text<p>paragraph 1</p><p>paragraph 2</p></div>'
|
50
|
+
topic.fix(html).gsub(/( |\n)/, "")
|
51
|
+
end
|
52
|
+
|
53
|
+
asserts_topic.includes("some text")
|
54
|
+
asserts_topic.includes("<p>paragraph 1</p>")
|
55
|
+
asserts_topic.includes("<p>paragraph 2</p>")
|
56
|
+
end
|
57
|
+
|
58
|
+
context "with a UL containing paragraphs" do
|
59
|
+
setup do
|
60
|
+
html = <<-HTML
|
61
|
+
<div>
|
62
|
+
<ul>
|
63
|
+
<li>some text</li>
|
64
|
+
<li>
|
65
|
+
<p>par. 1</p>
|
66
|
+
<p>par. 2</p>
|
67
|
+
</li>
|
68
|
+
</ul>
|
69
|
+
</div>
|
70
|
+
HTML
|
71
|
+
|
72
|
+
topic.fix(html).gsub(/( |\n)/, "")
|
73
|
+
end
|
74
|
+
|
75
|
+
asserts_topic.includes("<li>some text</li>")
|
76
|
+
asserts_topic.includes("<li><p>par. 1</p><p>par. 2</p></li>")
|
77
|
+
end
|
78
|
+
|
79
|
+
context "with span as a content container" do
|
80
|
+
setup do
|
81
|
+
topic.content_tags << "span"
|
82
|
+
topic.fix("<p><span>span 1</span><span>span 2</span></p>")
|
83
|
+
end
|
84
|
+
|
85
|
+
asserts_topic.includes("<span>span 1</span>")
|
86
|
+
asserts_topic.includes("<span>span 2</span>")
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
metadata
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: orph
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- David Eisinger
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2011-10-03 00:00:00.000000000 -04:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: nokogiri
|
17
|
+
requirement: &2153291960 !ruby/object:Gem::Requirement
|
18
|
+
none: false
|
19
|
+
requirements:
|
20
|
+
- - ! '>='
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: '0'
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: *2153291960
|
26
|
+
- !ruby/object:Gem::Dependency
|
27
|
+
name: riot
|
28
|
+
requirement: &2153291540 !ruby/object:Gem::Requirement
|
29
|
+
none: false
|
30
|
+
requirements:
|
31
|
+
- - ! '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: *2153291540
|
37
|
+
description: ! "Orphans (commonly referred to as 'widows') are\n single-word
|
38
|
+
lines at the end of paragraphs. This\n library removes them
|
39
|
+
with non-breaking spaces."
|
40
|
+
email:
|
41
|
+
- david.eisinger@gmail.com
|
42
|
+
executables: []
|
43
|
+
extensions: []
|
44
|
+
extra_rdoc_files: []
|
45
|
+
files:
|
46
|
+
- .gitignore
|
47
|
+
- Gemfile
|
48
|
+
- README.md
|
49
|
+
- Rakefile
|
50
|
+
- lib/orph.rb
|
51
|
+
- lib/orph/version.rb
|
52
|
+
- orph.gemspec
|
53
|
+
- test/orph_test.rb
|
54
|
+
has_rdoc: true
|
55
|
+
homepage: ''
|
56
|
+
licenses: []
|
57
|
+
post_install_message:
|
58
|
+
rdoc_options: []
|
59
|
+
require_paths:
|
60
|
+
- lib
|
61
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
62
|
+
none: false
|
63
|
+
requirements:
|
64
|
+
- - ! '>='
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: '0'
|
67
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
68
|
+
none: false
|
69
|
+
requirements:
|
70
|
+
- - ! '>='
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
version: '0'
|
73
|
+
requirements: []
|
74
|
+
rubyforge_project: orph
|
75
|
+
rubygems_version: 1.6.2
|
76
|
+
signing_key:
|
77
|
+
specification_version: 3
|
78
|
+
summary: Orph is a library for removing typographic orphans
|
79
|
+
test_files:
|
80
|
+
- test/orph_test.rb
|