html-pipeline 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.md +5 -0
- data/README.md +3 -3
- data/lib/html/pipeline/toc_filter.rb +26 -5
- data/lib/html/pipeline/version.rb +1 -1
- data/test/html/pipeline/toc_filter_test.rb +72 -9
- metadata +20 -20
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
|
@@ -96,7 +96,7 @@ filter.call
|
|
|
96
96
|
* `SanitizationFilter` - whitelist sanitize user markup
|
|
97
97
|
* `SyntaxHighlightFilter` - [code syntax highlighter](#syntax-highlighting)
|
|
98
98
|
* `TextileFilter` - convert textile to html
|
|
99
|
-
* `TableOfContentsFilter` - anchor headings with name attributes
|
|
99
|
+
* `TableOfContentsFilter` - anchor headings with name attributes and generate Table of Contents html unordered list linking headings
|
|
100
100
|
|
|
101
101
|
## Syntax highlighting
|
|
102
102
|
|
|
@@ -127,7 +127,7 @@ context = {
|
|
|
127
127
|
# related features.
|
|
128
128
|
SimplePipeline = Pipeline.new [
|
|
129
129
|
SanitizationFilter,
|
|
130
|
-
TableOfContentsFilter, # add 'name' anchors to all headers
|
|
130
|
+
TableOfContentsFilter, # add 'name' anchors to all headers and generate toc list
|
|
131
131
|
CamoFilter,
|
|
132
132
|
ImageMaxWidthFilter,
|
|
133
133
|
SyntaxHighlightFilter,
|
|
@@ -160,7 +160,7 @@ HtmlEmailPipeline = Pipeline.new [
|
|
|
160
160
|
|
|
161
161
|
# Just emoji.
|
|
162
162
|
EmojiPipeline = Pipeline.new [
|
|
163
|
-
|
|
163
|
+
PlainTextInputFilter,
|
|
164
164
|
EmojiFilter
|
|
165
165
|
], context
|
|
166
166
|
```
|
|
@@ -1,27 +1,48 @@
|
|
|
1
1
|
module HTML
|
|
2
2
|
class Pipeline
|
|
3
3
|
# HTML filter that adds a 'name' attribute to all headers
|
|
4
|
-
# in a document, so they can be accessed from a table of contents
|
|
4
|
+
# in a document, so they can be accessed from a table of contents.
|
|
5
5
|
#
|
|
6
|
-
#
|
|
7
|
-
#
|
|
8
|
-
#
|
|
6
|
+
# Generates the Table of Contents, with links to each header.
|
|
7
|
+
#
|
|
8
|
+
# Examples
|
|
9
|
+
#
|
|
10
|
+
# TocPipeline =
|
|
11
|
+
# HTML::Pipeline.new [
|
|
12
|
+
# HTML::Pipeline::TableOfContentsFilter
|
|
13
|
+
# ]
|
|
14
|
+
# # => #<HTML::Pipeline:0x007fc13c4528d8...>
|
|
15
|
+
# orig = %(<h1>Ice cube</h1><p>is not for the pop chart</p>)
|
|
16
|
+
# # => "<h1>Ice cube</h1><p>is not for the pop chart</p>"
|
|
17
|
+
# result = {}
|
|
18
|
+
# # => {}
|
|
19
|
+
# TocPipeline.call(orig, {}, result)
|
|
20
|
+
# # => {:toc=> ...}
|
|
21
|
+
# result[:toc]
|
|
22
|
+
# # => "<ul class=\"section-nav\">\n<li><a href=\"#ice-cube\">...</li><ul>"
|
|
23
|
+
# result[:output].to_s
|
|
24
|
+
# # => "<h1>\n<a name=\"ice-cube\" class=\"anchor\" href=\"#ice-cube\">..."
|
|
9
25
|
class TableOfContentsFilter < Filter
|
|
10
26
|
PUNCTUATION_REGEXP = RUBY_VERSION > "1.9" ? /[^\p{Word}\- ]/u : /[^\w\- ]/
|
|
11
27
|
|
|
12
28
|
def call
|
|
29
|
+
result[:toc] = ""
|
|
30
|
+
|
|
13
31
|
headers = Hash.new(0)
|
|
14
32
|
doc.css('h1, h2, h3, h4, h5, h6').each do |node|
|
|
15
|
-
|
|
33
|
+
text = node.text
|
|
34
|
+
name = text.downcase
|
|
16
35
|
name.gsub!(PUNCTUATION_REGEXP, '') # remove punctuation
|
|
17
36
|
name.gsub!(' ', '-') # replace spaces with dash
|
|
18
37
|
|
|
19
38
|
uniq = (headers[name] > 0) ? "-#{headers[name]}" : ''
|
|
20
39
|
headers[name] += 1
|
|
21
40
|
if header_content = node.children.first
|
|
41
|
+
result[:toc] << %Q{<li><a href="##{name}#{uniq}">#{text}</a></li>\n}
|
|
22
42
|
header_content.add_previous_sibling(%Q{<a name="#{name}#{uniq}" class="anchor" href="##{name}#{uniq}"><span class="octicon octicon-link"></span></a>})
|
|
23
43
|
end
|
|
24
44
|
end
|
|
45
|
+
result[:toc] = %Q{<ul class="section-nav">\n#{result[:toc]}</ul>} unless result[:toc].empty?
|
|
25
46
|
doc
|
|
26
47
|
end
|
|
27
48
|
end
|
|
@@ -4,11 +4,27 @@ require "test_helper"
|
|
|
4
4
|
class HTML::Pipeline::TableOfContentsFilterTest < Test::Unit::TestCase
|
|
5
5
|
TocFilter = HTML::Pipeline::TableOfContentsFilter
|
|
6
6
|
|
|
7
|
+
TocPipeline =
|
|
8
|
+
HTML::Pipeline.new [
|
|
9
|
+
HTML::Pipeline::TableOfContentsFilter
|
|
10
|
+
]
|
|
11
|
+
|
|
12
|
+
def toc
|
|
13
|
+
result = {}
|
|
14
|
+
TocPipeline.call(@orig, {}, result)
|
|
15
|
+
result[:toc]
|
|
16
|
+
end
|
|
17
|
+
|
|
7
18
|
def test_anchors_are_added_properly
|
|
8
19
|
orig = %(<h1>Ice cube</h1><p>Will swarm on any motherfucker in a blue uniform</p>)
|
|
9
20
|
assert_includes '<a name=', TocFilter.call(orig).to_s
|
|
10
21
|
end
|
|
11
22
|
|
|
23
|
+
def test_toc_list_added_properly
|
|
24
|
+
@orig = %(<h1>Ice cube</h1><p>Will swarm on any motherfucker in a blue uniform</p>)
|
|
25
|
+
assert_includes %Q{<ul class="section-nav">\n<li><a href="}, toc
|
|
26
|
+
end
|
|
27
|
+
|
|
12
28
|
def test_anchors_have_sane_names
|
|
13
29
|
orig = %(<h1>Dr Dre</h1><h1>Ice Cube</h1><h1>Eazy-E</h1><h1>MC Ren</h1>)
|
|
14
30
|
result = TocFilter.call(orig).to_s
|
|
@@ -19,6 +35,14 @@ class HTML::Pipeline::TableOfContentsFilterTest < Test::Unit::TestCase
|
|
|
19
35
|
assert_includes '"mc-ren"', result
|
|
20
36
|
end
|
|
21
37
|
|
|
38
|
+
def test_toc_hrefs_have_sane_values
|
|
39
|
+
@orig = %(<h1>Dr Dre</h1><h1>Ice Cube</h1><h1>Eazy-E</h1><h1>MC Ren</h1>)
|
|
40
|
+
assert_includes '"#dr-dre"', toc
|
|
41
|
+
assert_includes '"#ice-cube"', toc
|
|
42
|
+
assert_includes '"#eazy-e"', toc
|
|
43
|
+
assert_includes '"#mc-ren"', toc
|
|
44
|
+
end
|
|
45
|
+
|
|
22
46
|
def test_dupe_headers_have_unique_trailing_identifiers
|
|
23
47
|
orig = %(<h1>Straight Outta Compton</h1>
|
|
24
48
|
<h2>Dopeman</h2>
|
|
@@ -31,6 +55,16 @@ class HTML::Pipeline::TableOfContentsFilterTest < Test::Unit::TestCase
|
|
|
31
55
|
assert_includes '"dopeman-1"', result
|
|
32
56
|
end
|
|
33
57
|
|
|
58
|
+
def test_dupe_headers_have_unique_toc_anchors
|
|
59
|
+
@orig = %(<h1>Straight Outta Compton</h1>
|
|
60
|
+
<h2>Dopeman</h2>
|
|
61
|
+
<h3>Express Yourself</h3>
|
|
62
|
+
<h1>Dopeman</h1>)
|
|
63
|
+
|
|
64
|
+
assert_includes '"#dopeman"', toc
|
|
65
|
+
assert_includes '"#dopeman-1"', toc
|
|
66
|
+
end
|
|
67
|
+
|
|
34
68
|
def test_all_header_tags_are_found_when_adding_anchors
|
|
35
69
|
orig = %(<h1>"Funky President" by James Brown</h1>
|
|
36
70
|
<h2>"It's My Thing" by Marva Whitney</h2>
|
|
@@ -41,18 +75,47 @@ class HTML::Pipeline::TableOfContentsFilterTest < Test::Unit::TestCase
|
|
|
41
75
|
<h7>"Be Thankful for What You Got" by William DeVaughn</h7>)
|
|
42
76
|
|
|
43
77
|
doc = TocFilter.call(orig)
|
|
78
|
+
|
|
44
79
|
assert_equal 6, doc.search('a').size
|
|
45
80
|
end
|
|
46
81
|
|
|
47
|
-
def
|
|
48
|
-
orig = %(<h1
|
|
49
|
-
|
|
82
|
+
def test_toc_is_complete
|
|
83
|
+
@orig = %(<h1>"Funky President" by James Brown</h1>
|
|
84
|
+
<h2>"It's My Thing" by Marva Whitney</h2>
|
|
85
|
+
<h3>"Boogie Back" by Roy Ayers</h3>
|
|
86
|
+
<h4>"Feel Good" by Fancy</h4>
|
|
87
|
+
<h5>"Funky Drummer" by James Brown</h5>
|
|
88
|
+
<h6>"Ruthless Villain" by Eazy-E</h6>
|
|
89
|
+
<h7>"Be Thankful for What You Got" by William DeVaughn</h7>)
|
|
50
90
|
|
|
51
|
-
|
|
91
|
+
expected = %Q{<ul class="section-nav">\n<li><a href="#funky-president-by-james-brown">"Funky President" by James Brown</a></li>\n<li><a href="#its-my-thing-by-marva-whitney">"It's My Thing" by Marva Whitney</a></li>\n<li><a href="#boogie-back-by-roy-ayers">"Boogie Back" by Roy Ayers</a></li>\n<li><a href="#feel-good-by-fancy">"Feel Good" by Fancy</a></li>\n<li><a href="#funky-drummer-by-james-brown">"Funky Drummer" by James Brown</a></li>\n<li><a href="#ruthless-villain-by-eazy-e">"Ruthless Villain" by Eazy-E</a></li>\n</ul>}
|
|
52
92
|
|
|
53
|
-
assert_equal
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
93
|
+
assert_equal expected, toc
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
if RUBY_VERSION > "1.9" # not sure how to make this work on 1.8.7
|
|
97
|
+
|
|
98
|
+
def test_anchors_with_utf8_characters
|
|
99
|
+
orig = %(<h1>日本語</h1>
|
|
100
|
+
<h1>Русский</h1)
|
|
101
|
+
|
|
102
|
+
rendered_h1s = TocFilter.call(orig).search('h1').map(&:to_s)
|
|
103
|
+
|
|
104
|
+
assert_equal "<h1>\n<a name=\"%E6%97%A5%E6%9C%AC%E8%AA%9E\" class=\"anchor\" href=\"#%E6%97%A5%E6%9C%AC%E8%AA%9E\"><span class=\"octicon octicon-link\"></span></a>日本語</h1>",
|
|
105
|
+
rendered_h1s[0]
|
|
106
|
+
assert_equal "<h1>\n<a name=\"%D0%A0%D1%83%D1%81%D1%81%D0%BA%D0%B8%D0%B9\" class=\"anchor\" href=\"#%D0%A0%D1%83%D1%81%D1%81%D0%BA%D0%B8%D0%B9\"><span class=\"octicon octicon-link\"></span></a>Русский</h1>",
|
|
107
|
+
rendered_h1s[1]
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def test_toc_with_utf8_characters
|
|
111
|
+
@orig = %(<h1>日本語</h1>
|
|
112
|
+
<h1>Русский</h1)
|
|
113
|
+
|
|
114
|
+
rendered_toc = Nokogiri::HTML::DocumentFragment.parse(toc).to_s
|
|
115
|
+
|
|
116
|
+
expected = %Q{<ul class="section-nav">\n<li><a href="#%E6%97%A5%E6%9C%AC%E8%AA%9E">日本語</a></li>\n<li><a href="#%D0%A0%D1%83%D1%81%D1%81%D0%BA%D0%B8%D0%B9">Русский</a></li>\n</ul>}
|
|
117
|
+
|
|
118
|
+
assert_equal expected, rendered_toc
|
|
119
|
+
end
|
|
120
|
+
end
|
|
58
121
|
end
|
metadata
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: html-pipeline
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
hash:
|
|
4
|
+
hash: 23
|
|
5
5
|
prerelease:
|
|
6
6
|
segments:
|
|
7
7
|
- 0
|
|
8
|
-
-
|
|
8
|
+
- 2
|
|
9
9
|
- 0
|
|
10
|
-
version: 0.
|
|
10
|
+
version: 0.2.0
|
|
11
11
|
platform: ruby
|
|
12
12
|
authors:
|
|
13
13
|
- Ryan Tomayko
|
|
@@ -16,12 +16,10 @@ autorequire:
|
|
|
16
16
|
bindir: bin
|
|
17
17
|
cert_chain: []
|
|
18
18
|
|
|
19
|
-
date: 2013-07-
|
|
19
|
+
date: 2013-07-31 00:00:00 -07:00
|
|
20
20
|
default_executable:
|
|
21
21
|
dependencies:
|
|
22
22
|
- !ruby/object:Gem::Dependency
|
|
23
|
-
name: gemoji
|
|
24
|
-
prerelease: false
|
|
25
23
|
requirement: &id001 !ruby/object:Gem::Requirement
|
|
26
24
|
none: false
|
|
27
25
|
requirements:
|
|
@@ -33,10 +31,10 @@ dependencies:
|
|
|
33
31
|
- 0
|
|
34
32
|
version: "1.0"
|
|
35
33
|
type: :runtime
|
|
34
|
+
name: gemoji
|
|
36
35
|
version_requirements: *id001
|
|
37
|
-
- !ruby/object:Gem::Dependency
|
|
38
|
-
name: nokogiri
|
|
39
36
|
prerelease: false
|
|
37
|
+
- !ruby/object:Gem::Dependency
|
|
40
38
|
requirement: &id002 !ruby/object:Gem::Requirement
|
|
41
39
|
none: false
|
|
42
40
|
requirements:
|
|
@@ -55,10 +53,10 @@ dependencies:
|
|
|
55
53
|
- 6
|
|
56
54
|
version: "1.6"
|
|
57
55
|
type: :runtime
|
|
56
|
+
name: nokogiri
|
|
58
57
|
version_requirements: *id002
|
|
59
|
-
- !ruby/object:Gem::Dependency
|
|
60
|
-
name: github-markdown
|
|
61
58
|
prerelease: false
|
|
59
|
+
- !ruby/object:Gem::Dependency
|
|
62
60
|
requirement: &id003 !ruby/object:Gem::Requirement
|
|
63
61
|
none: false
|
|
64
62
|
requirements:
|
|
@@ -70,10 +68,10 @@ dependencies:
|
|
|
70
68
|
- 5
|
|
71
69
|
version: "0.5"
|
|
72
70
|
type: :runtime
|
|
71
|
+
name: github-markdown
|
|
73
72
|
version_requirements: *id003
|
|
74
|
-
- !ruby/object:Gem::Dependency
|
|
75
|
-
name: sanitize
|
|
76
73
|
prerelease: false
|
|
74
|
+
- !ruby/object:Gem::Dependency
|
|
77
75
|
requirement: &id004 !ruby/object:Gem::Requirement
|
|
78
76
|
none: false
|
|
79
77
|
requirements:
|
|
@@ -92,10 +90,10 @@ dependencies:
|
|
|
92
90
|
- 4
|
|
93
91
|
version: 2.0.4
|
|
94
92
|
type: :runtime
|
|
93
|
+
name: sanitize
|
|
95
94
|
version_requirements: *id004
|
|
96
|
-
- !ruby/object:Gem::Dependency
|
|
97
|
-
name: rinku
|
|
98
95
|
prerelease: false
|
|
96
|
+
- !ruby/object:Gem::Dependency
|
|
99
97
|
requirement: &id005 !ruby/object:Gem::Requirement
|
|
100
98
|
none: false
|
|
101
99
|
requirements:
|
|
@@ -107,10 +105,10 @@ dependencies:
|
|
|
107
105
|
- 7
|
|
108
106
|
version: "1.7"
|
|
109
107
|
type: :runtime
|
|
108
|
+
name: rinku
|
|
110
109
|
version_requirements: *id005
|
|
111
|
-
- !ruby/object:Gem::Dependency
|
|
112
|
-
name: escape_utils
|
|
113
110
|
prerelease: false
|
|
111
|
+
- !ruby/object:Gem::Dependency
|
|
114
112
|
requirement: &id006 !ruby/object:Gem::Requirement
|
|
115
113
|
none: false
|
|
116
114
|
requirements:
|
|
@@ -122,10 +120,10 @@ dependencies:
|
|
|
122
120
|
- 3
|
|
123
121
|
version: "0.3"
|
|
124
122
|
type: :runtime
|
|
123
|
+
name: escape_utils
|
|
125
124
|
version_requirements: *id006
|
|
126
|
-
- !ruby/object:Gem::Dependency
|
|
127
|
-
name: activesupport
|
|
128
125
|
prerelease: false
|
|
126
|
+
- !ruby/object:Gem::Dependency
|
|
129
127
|
requirement: &id007 !ruby/object:Gem::Requirement
|
|
130
128
|
none: false
|
|
131
129
|
requirements:
|
|
@@ -142,10 +140,10 @@ dependencies:
|
|
|
142
140
|
- 4
|
|
143
141
|
version: "4"
|
|
144
142
|
type: :runtime
|
|
143
|
+
name: activesupport
|
|
145
144
|
version_requirements: *id007
|
|
146
|
-
- !ruby/object:Gem::Dependency
|
|
147
|
-
name: github-linguist
|
|
148
145
|
prerelease: false
|
|
146
|
+
- !ruby/object:Gem::Dependency
|
|
149
147
|
requirement: &id008 !ruby/object:Gem::Requirement
|
|
150
148
|
none: false
|
|
151
149
|
requirements:
|
|
@@ -158,7 +156,9 @@ dependencies:
|
|
|
158
156
|
- 2
|
|
159
157
|
version: 2.6.2
|
|
160
158
|
type: :development
|
|
159
|
+
name: github-linguist
|
|
161
160
|
version_requirements: *id008
|
|
161
|
+
prerelease: false
|
|
162
162
|
description: GitHub HTML processing filters and utilities
|
|
163
163
|
email:
|
|
164
164
|
- ryan@github.com
|