html-pipeline 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.md +5 -0
- data/README.md +3 -3
- data/lib/html/pipeline/toc_filter.rb +26 -5
- data/lib/html/pipeline/version.rb +1 -1
- data/test/html/pipeline/toc_filter_test.rb +72 -9
- metadata +20 -20
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -96,7 +96,7 @@ filter.call
|
|
96
96
|
* `SanitizationFilter` - whitelist sanitize user markup
|
97
97
|
* `SyntaxHighlightFilter` - [code syntax highlighter](#syntax-highlighting)
|
98
98
|
* `TextileFilter` - convert textile to html
|
99
|
-
* `TableOfContentsFilter` - anchor headings with name attributes
|
99
|
+
* `TableOfContentsFilter` - anchor headings with name attributes and generate Table of Contents html unordered list linking headings
|
100
100
|
|
101
101
|
## Syntax highlighting
|
102
102
|
|
@@ -127,7 +127,7 @@ context = {
|
|
127
127
|
# related features.
|
128
128
|
SimplePipeline = Pipeline.new [
|
129
129
|
SanitizationFilter,
|
130
|
-
TableOfContentsFilter, # add 'name' anchors to all headers
|
130
|
+
TableOfContentsFilter, # add 'name' anchors to all headers and generate toc list
|
131
131
|
CamoFilter,
|
132
132
|
ImageMaxWidthFilter,
|
133
133
|
SyntaxHighlightFilter,
|
@@ -160,7 +160,7 @@ HtmlEmailPipeline = Pipeline.new [
|
|
160
160
|
|
161
161
|
# Just emoji.
|
162
162
|
EmojiPipeline = Pipeline.new [
|
163
|
-
|
163
|
+
PlainTextInputFilter,
|
164
164
|
EmojiFilter
|
165
165
|
], context
|
166
166
|
```
|
@@ -1,27 +1,48 @@
|
|
1
1
|
module HTML
|
2
2
|
class Pipeline
|
3
3
|
# HTML filter that adds a 'name' attribute to all headers
|
4
|
-
# in a document, so they can be accessed from a table of contents
|
4
|
+
# in a document, so they can be accessed from a table of contents.
|
5
5
|
#
|
6
|
-
#
|
7
|
-
#
|
8
|
-
#
|
6
|
+
# Generates the Table of Contents, with links to each header.
|
7
|
+
#
|
8
|
+
# Examples
|
9
|
+
#
|
10
|
+
# TocPipeline =
|
11
|
+
# HTML::Pipeline.new [
|
12
|
+
# HTML::Pipeline::TableOfContentsFilter
|
13
|
+
# ]
|
14
|
+
# # => #<HTML::Pipeline:0x007fc13c4528d8...>
|
15
|
+
# orig = %(<h1>Ice cube</h1><p>is not for the pop chart</p>)
|
16
|
+
# # => "<h1>Ice cube</h1><p>is not for the pop chart</p>"
|
17
|
+
# result = {}
|
18
|
+
# # => {}
|
19
|
+
# TocPipeline.call(orig, {}, result)
|
20
|
+
# # => {:toc=> ...}
|
21
|
+
# result[:toc]
|
22
|
+
# # => "<ul class=\"section-nav\">\n<li><a href=\"#ice-cube\">...</li><ul>"
|
23
|
+
# result[:output].to_s
|
24
|
+
# # => "<h1>\n<a name=\"ice-cube\" class=\"anchor\" href=\"#ice-cube\">..."
|
9
25
|
class TableOfContentsFilter < Filter
|
10
26
|
PUNCTUATION_REGEXP = RUBY_VERSION > "1.9" ? /[^\p{Word}\- ]/u : /[^\w\- ]/
|
11
27
|
|
12
28
|
def call
|
29
|
+
result[:toc] = ""
|
30
|
+
|
13
31
|
headers = Hash.new(0)
|
14
32
|
doc.css('h1, h2, h3, h4, h5, h6').each do |node|
|
15
|
-
|
33
|
+
text = node.text
|
34
|
+
name = text.downcase
|
16
35
|
name.gsub!(PUNCTUATION_REGEXP, '') # remove punctuation
|
17
36
|
name.gsub!(' ', '-') # replace spaces with dash
|
18
37
|
|
19
38
|
uniq = (headers[name] > 0) ? "-#{headers[name]}" : ''
|
20
39
|
headers[name] += 1
|
21
40
|
if header_content = node.children.first
|
41
|
+
result[:toc] << %Q{<li><a href="##{name}#{uniq}">#{text}</a></li>\n}
|
22
42
|
header_content.add_previous_sibling(%Q{<a name="#{name}#{uniq}" class="anchor" href="##{name}#{uniq}"><span class="octicon octicon-link"></span></a>})
|
23
43
|
end
|
24
44
|
end
|
45
|
+
result[:toc] = %Q{<ul class="section-nav">\n#{result[:toc]}</ul>} unless result[:toc].empty?
|
25
46
|
doc
|
26
47
|
end
|
27
48
|
end
|
@@ -4,11 +4,27 @@ require "test_helper"
|
|
4
4
|
class HTML::Pipeline::TableOfContentsFilterTest < Test::Unit::TestCase
|
5
5
|
TocFilter = HTML::Pipeline::TableOfContentsFilter
|
6
6
|
|
7
|
+
TocPipeline =
|
8
|
+
HTML::Pipeline.new [
|
9
|
+
HTML::Pipeline::TableOfContentsFilter
|
10
|
+
]
|
11
|
+
|
12
|
+
def toc
|
13
|
+
result = {}
|
14
|
+
TocPipeline.call(@orig, {}, result)
|
15
|
+
result[:toc]
|
16
|
+
end
|
17
|
+
|
7
18
|
def test_anchors_are_added_properly
|
8
19
|
orig = %(<h1>Ice cube</h1><p>Will swarm on any motherfucker in a blue uniform</p>)
|
9
20
|
assert_includes '<a name=', TocFilter.call(orig).to_s
|
10
21
|
end
|
11
22
|
|
23
|
+
def test_toc_list_added_properly
|
24
|
+
@orig = %(<h1>Ice cube</h1><p>Will swarm on any motherfucker in a blue uniform</p>)
|
25
|
+
assert_includes %Q{<ul class="section-nav">\n<li><a href="}, toc
|
26
|
+
end
|
27
|
+
|
12
28
|
def test_anchors_have_sane_names
|
13
29
|
orig = %(<h1>Dr Dre</h1><h1>Ice Cube</h1><h1>Eazy-E</h1><h1>MC Ren</h1>)
|
14
30
|
result = TocFilter.call(orig).to_s
|
@@ -19,6 +35,14 @@ class HTML::Pipeline::TableOfContentsFilterTest < Test::Unit::TestCase
|
|
19
35
|
assert_includes '"mc-ren"', result
|
20
36
|
end
|
21
37
|
|
38
|
+
def test_toc_hrefs_have_sane_values
|
39
|
+
@orig = %(<h1>Dr Dre</h1><h1>Ice Cube</h1><h1>Eazy-E</h1><h1>MC Ren</h1>)
|
40
|
+
assert_includes '"#dr-dre"', toc
|
41
|
+
assert_includes '"#ice-cube"', toc
|
42
|
+
assert_includes '"#eazy-e"', toc
|
43
|
+
assert_includes '"#mc-ren"', toc
|
44
|
+
end
|
45
|
+
|
22
46
|
def test_dupe_headers_have_unique_trailing_identifiers
|
23
47
|
orig = %(<h1>Straight Outta Compton</h1>
|
24
48
|
<h2>Dopeman</h2>
|
@@ -31,6 +55,16 @@ class HTML::Pipeline::TableOfContentsFilterTest < Test::Unit::TestCase
|
|
31
55
|
assert_includes '"dopeman-1"', result
|
32
56
|
end
|
33
57
|
|
58
|
+
def test_dupe_headers_have_unique_toc_anchors
|
59
|
+
@orig = %(<h1>Straight Outta Compton</h1>
|
60
|
+
<h2>Dopeman</h2>
|
61
|
+
<h3>Express Yourself</h3>
|
62
|
+
<h1>Dopeman</h1>)
|
63
|
+
|
64
|
+
assert_includes '"#dopeman"', toc
|
65
|
+
assert_includes '"#dopeman-1"', toc
|
66
|
+
end
|
67
|
+
|
34
68
|
def test_all_header_tags_are_found_when_adding_anchors
|
35
69
|
orig = %(<h1>"Funky President" by James Brown</h1>
|
36
70
|
<h2>"It's My Thing" by Marva Whitney</h2>
|
@@ -41,18 +75,47 @@ class HTML::Pipeline::TableOfContentsFilterTest < Test::Unit::TestCase
|
|
41
75
|
<h7>"Be Thankful for What You Got" by William DeVaughn</h7>)
|
42
76
|
|
43
77
|
doc = TocFilter.call(orig)
|
78
|
+
|
44
79
|
assert_equal 6, doc.search('a').size
|
45
80
|
end
|
46
81
|
|
47
|
-
def
|
48
|
-
orig = %(<h1
|
49
|
-
|
82
|
+
def test_toc_is_complete
|
83
|
+
@orig = %(<h1>"Funky President" by James Brown</h1>
|
84
|
+
<h2>"It's My Thing" by Marva Whitney</h2>
|
85
|
+
<h3>"Boogie Back" by Roy Ayers</h3>
|
86
|
+
<h4>"Feel Good" by Fancy</h4>
|
87
|
+
<h5>"Funky Drummer" by James Brown</h5>
|
88
|
+
<h6>"Ruthless Villain" by Eazy-E</h6>
|
89
|
+
<h7>"Be Thankful for What You Got" by William DeVaughn</h7>)
|
50
90
|
|
51
|
-
|
91
|
+
expected = %Q{<ul class="section-nav">\n<li><a href="#funky-president-by-james-brown">"Funky President" by James Brown</a></li>\n<li><a href="#its-my-thing-by-marva-whitney">"It's My Thing" by Marva Whitney</a></li>\n<li><a href="#boogie-back-by-roy-ayers">"Boogie Back" by Roy Ayers</a></li>\n<li><a href="#feel-good-by-fancy">"Feel Good" by Fancy</a></li>\n<li><a href="#funky-drummer-by-james-brown">"Funky Drummer" by James Brown</a></li>\n<li><a href="#ruthless-villain-by-eazy-e">"Ruthless Villain" by Eazy-E</a></li>\n</ul>}
|
52
92
|
|
53
|
-
assert_equal
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
93
|
+
assert_equal expected, toc
|
94
|
+
end
|
95
|
+
|
96
|
+
if RUBY_VERSION > "1.9" # not sure how to make this work on 1.8.7
|
97
|
+
|
98
|
+
def test_anchors_with_utf8_characters
|
99
|
+
orig = %(<h1>日本語</h1>
|
100
|
+
<h1>Русский</h1)
|
101
|
+
|
102
|
+
rendered_h1s = TocFilter.call(orig).search('h1').map(&:to_s)
|
103
|
+
|
104
|
+
assert_equal "<h1>\n<a name=\"%E6%97%A5%E6%9C%AC%E8%AA%9E\" class=\"anchor\" href=\"#%E6%97%A5%E6%9C%AC%E8%AA%9E\"><span class=\"octicon octicon-link\"></span></a>日本語</h1>",
|
105
|
+
rendered_h1s[0]
|
106
|
+
assert_equal "<h1>\n<a name=\"%D0%A0%D1%83%D1%81%D1%81%D0%BA%D0%B8%D0%B9\" class=\"anchor\" href=\"#%D0%A0%D1%83%D1%81%D1%81%D0%BA%D0%B8%D0%B9\"><span class=\"octicon octicon-link\"></span></a>Русский</h1>",
|
107
|
+
rendered_h1s[1]
|
108
|
+
end
|
109
|
+
|
110
|
+
def test_toc_with_utf8_characters
|
111
|
+
@orig = %(<h1>日本語</h1>
|
112
|
+
<h1>Русский</h1)
|
113
|
+
|
114
|
+
rendered_toc = Nokogiri::HTML::DocumentFragment.parse(toc).to_s
|
115
|
+
|
116
|
+
expected = %Q{<ul class="section-nav">\n<li><a href="#%E6%97%A5%E6%9C%AC%E8%AA%9E">日本語</a></li>\n<li><a href="#%D0%A0%D1%83%D1%81%D1%81%D0%BA%D0%B8%D0%B9">Русский</a></li>\n</ul>}
|
117
|
+
|
118
|
+
assert_equal expected, rendered_toc
|
119
|
+
end
|
120
|
+
end
|
58
121
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html-pipeline
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
8
|
+
- 2
|
9
9
|
- 0
|
10
|
-
version: 0.
|
10
|
+
version: 0.2.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Ryan Tomayko
|
@@ -16,12 +16,10 @@ autorequire:
|
|
16
16
|
bindir: bin
|
17
17
|
cert_chain: []
|
18
18
|
|
19
|
-
date: 2013-07-
|
19
|
+
date: 2013-07-31 00:00:00 -07:00
|
20
20
|
default_executable:
|
21
21
|
dependencies:
|
22
22
|
- !ruby/object:Gem::Dependency
|
23
|
-
name: gemoji
|
24
|
-
prerelease: false
|
25
23
|
requirement: &id001 !ruby/object:Gem::Requirement
|
26
24
|
none: false
|
27
25
|
requirements:
|
@@ -33,10 +31,10 @@ dependencies:
|
|
33
31
|
- 0
|
34
32
|
version: "1.0"
|
35
33
|
type: :runtime
|
34
|
+
name: gemoji
|
36
35
|
version_requirements: *id001
|
37
|
-
- !ruby/object:Gem::Dependency
|
38
|
-
name: nokogiri
|
39
36
|
prerelease: false
|
37
|
+
- !ruby/object:Gem::Dependency
|
40
38
|
requirement: &id002 !ruby/object:Gem::Requirement
|
41
39
|
none: false
|
42
40
|
requirements:
|
@@ -55,10 +53,10 @@ dependencies:
|
|
55
53
|
- 6
|
56
54
|
version: "1.6"
|
57
55
|
type: :runtime
|
56
|
+
name: nokogiri
|
58
57
|
version_requirements: *id002
|
59
|
-
- !ruby/object:Gem::Dependency
|
60
|
-
name: github-markdown
|
61
58
|
prerelease: false
|
59
|
+
- !ruby/object:Gem::Dependency
|
62
60
|
requirement: &id003 !ruby/object:Gem::Requirement
|
63
61
|
none: false
|
64
62
|
requirements:
|
@@ -70,10 +68,10 @@ dependencies:
|
|
70
68
|
- 5
|
71
69
|
version: "0.5"
|
72
70
|
type: :runtime
|
71
|
+
name: github-markdown
|
73
72
|
version_requirements: *id003
|
74
|
-
- !ruby/object:Gem::Dependency
|
75
|
-
name: sanitize
|
76
73
|
prerelease: false
|
74
|
+
- !ruby/object:Gem::Dependency
|
77
75
|
requirement: &id004 !ruby/object:Gem::Requirement
|
78
76
|
none: false
|
79
77
|
requirements:
|
@@ -92,10 +90,10 @@ dependencies:
|
|
92
90
|
- 4
|
93
91
|
version: 2.0.4
|
94
92
|
type: :runtime
|
93
|
+
name: sanitize
|
95
94
|
version_requirements: *id004
|
96
|
-
- !ruby/object:Gem::Dependency
|
97
|
-
name: rinku
|
98
95
|
prerelease: false
|
96
|
+
- !ruby/object:Gem::Dependency
|
99
97
|
requirement: &id005 !ruby/object:Gem::Requirement
|
100
98
|
none: false
|
101
99
|
requirements:
|
@@ -107,10 +105,10 @@ dependencies:
|
|
107
105
|
- 7
|
108
106
|
version: "1.7"
|
109
107
|
type: :runtime
|
108
|
+
name: rinku
|
110
109
|
version_requirements: *id005
|
111
|
-
- !ruby/object:Gem::Dependency
|
112
|
-
name: escape_utils
|
113
110
|
prerelease: false
|
111
|
+
- !ruby/object:Gem::Dependency
|
114
112
|
requirement: &id006 !ruby/object:Gem::Requirement
|
115
113
|
none: false
|
116
114
|
requirements:
|
@@ -122,10 +120,10 @@ dependencies:
|
|
122
120
|
- 3
|
123
121
|
version: "0.3"
|
124
122
|
type: :runtime
|
123
|
+
name: escape_utils
|
125
124
|
version_requirements: *id006
|
126
|
-
- !ruby/object:Gem::Dependency
|
127
|
-
name: activesupport
|
128
125
|
prerelease: false
|
126
|
+
- !ruby/object:Gem::Dependency
|
129
127
|
requirement: &id007 !ruby/object:Gem::Requirement
|
130
128
|
none: false
|
131
129
|
requirements:
|
@@ -142,10 +140,10 @@ dependencies:
|
|
142
140
|
- 4
|
143
141
|
version: "4"
|
144
142
|
type: :runtime
|
143
|
+
name: activesupport
|
145
144
|
version_requirements: *id007
|
146
|
-
- !ruby/object:Gem::Dependency
|
147
|
-
name: github-linguist
|
148
145
|
prerelease: false
|
146
|
+
- !ruby/object:Gem::Dependency
|
149
147
|
requirement: &id008 !ruby/object:Gem::Requirement
|
150
148
|
none: false
|
151
149
|
requirements:
|
@@ -158,7 +156,9 @@ dependencies:
|
|
158
156
|
- 2
|
159
157
|
version: 2.6.2
|
160
158
|
type: :development
|
159
|
+
name: github-linguist
|
161
160
|
version_requirements: *id008
|
161
|
+
prerelease: false
|
162
162
|
description: GitHub HTML processing filters and utilities
|
163
163
|
email:
|
164
164
|
- ryan@github.com
|