html-pipeline 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. data/.gitignore +19 -0
  2. data/.travis.yml +13 -0
  3. data/Gemfile +9 -0
  4. data/LICENSE +22 -0
  5. data/README.md +128 -0
  6. data/Rakefile +11 -0
  7. data/html-pipeline.gemspec +25 -0
  8. data/lib/html/pipeline.rb +130 -0
  9. data/lib/html/pipeline/@mention_filter.rb +118 -0
  10. data/lib/html/pipeline/autolink_filter.rb +22 -0
  11. data/lib/html/pipeline/body_content.rb +42 -0
  12. data/lib/html/pipeline/camo_filter.rb +64 -0
  13. data/lib/html/pipeline/email_reply_filter.rb +56 -0
  14. data/lib/html/pipeline/emoji_filter.rb +48 -0
  15. data/lib/html/pipeline/filter.rb +158 -0
  16. data/lib/html/pipeline/https_filter.rb +13 -0
  17. data/lib/html/pipeline/image_max_width_filter.rb +37 -0
  18. data/lib/html/pipeline/markdown_filter.rb +29 -0
  19. data/lib/html/pipeline/plain_text_input_filter.rb +11 -0
  20. data/lib/html/pipeline/sanitization_filter.rb +107 -0
  21. data/lib/html/pipeline/syntax_highlight_filter.rb +29 -0
  22. data/lib/html/pipeline/text_filter.rb +14 -0
  23. data/lib/html/pipeline/textile_filter.rb +21 -0
  24. data/lib/html/pipeline/toc_filter.rb +28 -0
  25. data/lib/html/pipeline/version.rb +5 -0
  26. data/test/html/pipeline/autolink_filter_test.rb +22 -0
  27. data/test/html/pipeline/camo_filter_test.rb +39 -0
  28. data/test/html/pipeline/emoji_filter_test.rb +16 -0
  29. data/test/html/pipeline/image_max_width_filter_test.rb +50 -0
  30. data/test/html/pipeline/markdown_filter_test.rb +101 -0
  31. data/test/html/pipeline/mention_filter_test.rb +158 -0
  32. data/test/html/pipeline/plain_text_input_filter_test.rb +22 -0
  33. data/test/html/pipeline/sanitization_filter_test.rb +47 -0
  34. data/test/html/pipeline/toc_filter_test.rb +47 -0
  35. data/test/test_helper.rb +38 -0
  36. metadata +221 -0
@@ -0,0 +1,158 @@
1
+ require "test_helper"
2
+
3
+ class HTML::Pipeline::MentionFilterTest < Test::Unit::TestCase
4
+ def filter(html, base_url='/', info_url=nil)
5
+ HTML::Pipeline::MentionFilter.call(html, :base_url => base_url, :info_url => info_url)
6
+ end
7
+
8
+ def test_filtering_a_documentfragment
9
+ body = "<p>@kneath: check it out.</p>"
10
+ doc = Nokogiri::HTML::DocumentFragment.parse(body)
11
+
12
+ res = filter(doc, '/')
13
+ assert_same doc, res
14
+
15
+ link = "<a href=\"/kneath\" class=\"user-mention\">@kneath</a>"
16
+ assert_equal "<p>#{link}: check it out.</p>",
17
+ res.to_html
18
+ end
19
+
20
+ def test_filtering_plain_text
21
+ body = "<p>@kneath: check it out.</p>"
22
+ res = filter(body, '/')
23
+
24
+ link = "<a href=\"/kneath\" class=\"user-mention\">@kneath</a>"
25
+ assert_equal "<p>#{link}: check it out.</p>",
26
+ res.to_html
27
+ end
28
+
29
+ def test_not_replacing_mentions_in_pre_tags
30
+ body = "<pre>@kneath: okay</pre>"
31
+ assert_equal body, filter(body).to_html
32
+ end
33
+
34
+ def test_not_replacing_mentions_in_code_tags
35
+ body = "<p><code>@kneath:</code> okay</p>"
36
+ assert_equal body, filter(body).to_html
37
+ end
38
+
39
+ def test_not_replacing_mentions_in_links
40
+ body = "<p><a>@kneath</a> okay</p>"
41
+ assert_equal body, filter(body).to_html
42
+ end
43
+
44
+ def test_entity_encoding_and_whatnot
45
+ body = "<p>@&#x6b;neath what's up</p>"
46
+ link = "<a href=\"/kneath\" class=\"user-mention\">@kneath</a>"
47
+ assert_equal "<p>#{link} what's up</p>", filter(body, '/').to_html
48
+ end
49
+
50
+ def test_html_injection
51
+ body = "<p>@kneath &lt;script>alert(0)&lt;/script></p>"
52
+ link = "<a href=\"/kneath\" class=\"user-mention\">@kneath</a>"
53
+ assert_equal "<p>#{link} &lt;script&gt;alert(0)&lt;/script&gt;</p>",
54
+ filter(body, '/').to_html
55
+ end
56
+
57
+ def test_links_to_nothing_when_no_info_url_given
58
+ body = "<p>How do I @mention someone?</p>"
59
+ assert_equal "<p>How do I @mention someone?</p>",
60
+ filter(body, '/').to_html
61
+ end
62
+
63
+ def test_links_to_more_info_when_info_url_given
64
+ body = "<p>How do I @mention someone?</p>"
65
+ link = "<a href=\"https://github.com/blog/821\" class=\"user-mention\">@mention</a>"
66
+ assert_equal "<p>How do I #{link} someone?</p>",
67
+ filter(body, '/', 'https://github.com/blog/821').to_html
68
+ end
69
+
70
+ MarkdownPipeline =
71
+ HTML::Pipeline.new [
72
+ HTML::Pipeline::MarkdownFilter,
73
+ HTML::Pipeline::MentionFilter
74
+ ]
75
+
76
+ def mentioned_usernames
77
+ result = {}
78
+ MarkdownPipeline.call(@body, {}, result)
79
+ html = result[:output].to_html
80
+ users = html.scan(/user-mention">@(.+?)</)
81
+ users ? users.flatten.uniq : []
82
+ end
83
+
84
+ def test_matches_usernames_in_body
85
+ @body = "@test how are you?"
86
+ assert_equal %w[test], mentioned_usernames
87
+ end
88
+
89
+ def test_matches_usernames_with_dashes
90
+ @body = "hi @some-user"
91
+ assert_equal %w[some-user], mentioned_usernames
92
+ end
93
+
94
+ def test_matches_usernames_followed_by_a_single_dot
95
+ @body = "okay @some-user."
96
+ assert_equal %w[some-user], mentioned_usernames
97
+ end
98
+
99
+ def test_matches_usernames_followed_by_multiple_dots
100
+ @body = "okay @some-user..."
101
+ assert_equal %w[some-user], mentioned_usernames
102
+ end
103
+
104
+ def test_does_not_match_email_addresses
105
+ @body = "aman@tmm1.net"
106
+ assert_equal [], mentioned_usernames
107
+ end
108
+
109
+ def test_does_not_match_domain_name_looking_things
110
+ @body = "we need a @github.com email"
111
+ assert_equal [], mentioned_usernames
112
+ end
113
+
114
+ def test_does_not_match_organization_team_mentions
115
+ @body = "we need to @github/enterprise know"
116
+ assert_equal [], mentioned_usernames
117
+ end
118
+
119
+ def test_matches_colon_suffixed_names
120
+ @body = "@tmm1: what do you think?"
121
+ assert_equal %w[tmm1], mentioned_usernames
122
+ end
123
+
124
+ def test_matches_list_of_names
125
+ @body = "@defunkt @atmos @kneath"
126
+ assert_equal %w[defunkt atmos kneath], mentioned_usernames
127
+ end
128
+
129
+ def test_matches_list_of_names_with_commas
130
+ @body = "/cc @defunkt, @atmos, @kneath"
131
+ assert_equal %w[defunkt atmos kneath], mentioned_usernames
132
+ end
133
+
134
+ def test_matches_inside_brackets
135
+ @body = "(@mislav) and [@rtomayko]"
136
+ assert_equal %w[mislav rtomayko], mentioned_usernames
137
+ end
138
+
139
+ def test_doesnt_ignore_invalid_users
140
+ @body = "@defunkt @mojombo and @somedude"
141
+ assert_equal ['defunkt', 'mojombo', 'somedude'], mentioned_usernames
142
+ end
143
+
144
+ def test_returns_distinct_set
145
+ @body = "/cc @defunkt, @atmos, @kneath, @defunkt, @defunkt"
146
+ assert_equal %w[defunkt atmos kneath], mentioned_usernames
147
+ end
148
+
149
+ def test_does_not_match_inline_code_block_with_multiple_code_blocks
150
+ @body = "something\n\n`/cc @defunkt @atmos @kneath` `/cc @atmos/atmos`"
151
+ assert_equal %w[], mentioned_usernames
152
+ end
153
+
154
+ def test_mention_at_end_of_parenthetical_sentence
155
+ @body = "(We're talking 'bout @ymendel.)"
156
+ assert_equal %w[ymendel], mentioned_usernames
157
+ end
158
+ end
@@ -0,0 +1,22 @@
1
+ require "test_helper"
2
+
3
+ class HTML::Pipeline::PlainTextInputFilterTest < Test::Unit::TestCase
4
+ PlainTextInputFilter = HTML::Pipeline::PlainTextInputFilter
5
+
6
+ def test_fails_when_given_a_documentfragment
7
+ body = "<p>heyo</p>"
8
+ doc = Nokogiri::HTML::DocumentFragment.parse(body)
9
+ assert_raise(TypeError) { PlainTextInputFilter.call(doc, {}) }
10
+ end
11
+
12
+ def test_wraps_input_in_a_div_element
13
+ doc = PlainTextInputFilter.call("howdy pahtner", {})
14
+ assert_equal "<div>howdy pahtner</div>", doc.to_s
15
+ end
16
+
17
+ def test_html_escapes_plain_text_input
18
+ doc = PlainTextInputFilter.call("See: <http://example.org>", {})
19
+ assert_equal "<div>See: &lt;http://example.org&gt;</div>",
20
+ doc.to_s
21
+ end
22
+ end
@@ -0,0 +1,47 @@
1
+ require "test_helper"
2
+
3
+ class HTML::Pipeline::SanitizationFilterTest < Test::Unit::TestCase
4
+ SanitizationFilter = HTML::Pipeline::SanitizationFilter
5
+
6
+ def test_removing_script_tags
7
+ orig = %(<p><img src="http://github.com/img.png" /><script></script></p>)
8
+ html = SanitizationFilter.call(orig).to_s
9
+ assert_no_match /script/, html
10
+ end
11
+
12
+ def test_removing_style_tags
13
+ orig = %(<p><style>hey now</style></p>)
14
+ html = SanitizationFilter.call(orig).to_s
15
+ assert_no_match /style/, html
16
+ end
17
+
18
+ def test_removing_style_attributes
19
+ orig = %(<p style='font-size:1000%'>YO DAWG</p>)
20
+ html = SanitizationFilter.call(orig).to_s
21
+ assert_no_match /font-size/, html
22
+ assert_no_match /style/, html
23
+ end
24
+
25
+ def test_removing_script_event_handler_attributes
26
+ orig = %(<a onclick='javascript:alert(0)'>YO DAWG</a>)
27
+ html = SanitizationFilter.call(orig).to_s
28
+ assert_no_match /javscript/, html
29
+ assert_no_match /onclick/, html
30
+ end
31
+
32
+ def test_sanitizes_li_elements_not_contained_in_ul_or_ol
33
+ stuff = "a\n<li>b</li>\nc"
34
+ html = SanitizationFilter.call(stuff).to_s
35
+ assert_equal "a\n b \nc", html
36
+ end
37
+
38
+ def test_does_not_sanitize_li_elements_contained_in_ul_or_ol
39
+ stuff = "a\n<ul><li>b</li></ul>\nc"
40
+ assert_equal stuff, SanitizationFilter.call(stuff).to_s
41
+ end
42
+
43
+ def test_github_specific_protocols_are_not_removed
44
+ stuff = '<a href="github-windows://spillthelog">Spill this yo</a> and so on'
45
+ assert_equal stuff, SanitizationFilter.call(stuff).to_s
46
+ end
47
+ end
@@ -0,0 +1,47 @@
1
+ require "test_helper"
2
+
3
+ class HTML::Pipeline::TableOfContentsFilterTest < Test::Unit::TestCase
4
+ TocFilter = HTML::Pipeline::TableOfContentsFilter
5
+
6
+ def test_anchors_are_added_properly
7
+ orig = %(<h1>Ice cube</h1><p>Will swarm on any motherfucker in a blue uniform</p>)
8
+ assert_includes '<a name=', TocFilter.call(orig).to_s
9
+ end
10
+
11
+ def test_anchors_have_sane_names
12
+ orig = %(<h1>Dr Dre</h1><h1>Ice Cube</h1><h1>Eazy-E</h1><h1>MC Ren</h1>)
13
+ result = TocFilter.call(orig).to_s
14
+
15
+ assert_includes '"dr-dre"', result
16
+ assert_includes '"ice-cube"', result
17
+ assert_includes '"eazy-e"', result
18
+ assert_includes '"mc-ren"', result
19
+ end
20
+
21
+ def test_dupe_headers_have_unique_trailing_identifiers
22
+ orig = %(<h1>Straight Outta Compton</h1>
23
+ <h2>Dopeman</h2>
24
+ <h3>Express Yourself</h3>
25
+ <h1>Dopeman</h1>)
26
+
27
+ result = TocFilter.call(orig).to_s
28
+
29
+ assert_includes '"dopeman"', result
30
+ assert_includes '"dopeman-1"', result
31
+ end
32
+
33
+ def test_all_header_tags_are_found_when_adding_anchors
34
+ orig = %(<h1>"Funky President" by James Brown</h1>
35
+ <h2>"It's My Thing" by Marva Whitney</h2>
36
+ <h3>"Boogie Back" by Roy Ayers</h3>
37
+ <h4>"Feel Good" by Fancy</h4>
38
+ <h5>"Funky Drummer" by James Brown</h5>
39
+ <h6>"Ruthless Villain" by Eazy-E</h6>
40
+ <h7>"Be Thankful for What You Got" by William DeVaughn</h7>)
41
+
42
+ doc = TocFilter.call(orig)
43
+ assert_equal 6, doc.search('a').size
44
+ end
45
+ end
46
+
47
+
@@ -0,0 +1,38 @@
1
+ require 'bundler/setup'
2
+ require 'html/pipeline'
3
+ require 'test/unit'
4
+
5
+ require 'active_support/core_ext/object/try'
6
+
7
+ module TestHelpers
8
+ # Asserts that `needle` is not a member of `haystack`, where
9
+ # `haystack` is any object that responds to `include?`.
10
+ def assert_doesnt_include(needle, haystack, message = nil)
11
+ error = '<?> included in <?>'
12
+ message = build_message(message, error, needle.to_s, Array(haystack).map(&:to_s))
13
+
14
+ assert_block message do
15
+ !haystack.include?(needle)
16
+ end
17
+ end
18
+
19
+ # Asserts that `needle` is a member of `haystack`, where
20
+ # `haystack` is any object that responds to `include?`.
21
+ def assert_includes(needle, haystack, message = nil)
22
+ error = '<?> not included in <?>'
23
+ message = build_message(message, error, needle.to_s, Array(haystack).map(&:to_s))
24
+
25
+ assert_block message do
26
+ haystack.include?(needle)
27
+ end
28
+ end
29
+
30
+ # Asserts that two html fragments are equivalent. Attribute order
31
+ # will be ignored.
32
+ def assert_equal_html(expected, actual)
33
+ assert_equal Nokogiri::HTML::DocumentFragment.parse(expected).to_hash,
34
+ Nokogiri::HTML::DocumentFragment.parse(actual).to_hash
35
+ end
36
+ end
37
+
38
+ Test::Unit::TestCase.send(:include, TestHelpers)
metadata ADDED
@@ -0,0 +1,221 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: html-pipeline
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.4
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Ryan Tomayko
9
+ - Jerry Cheung
10
+ autorequire:
11
+ bindir: bin
12
+ cert_chain: []
13
+ date: 2012-11-07 00:00:00.000000000 Z
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: gemoji
17
+ requirement: !ruby/object:Gem::Requirement
18
+ none: false
19
+ requirements:
20
+ - - ~>
21
+ - !ruby/object:Gem::Version
22
+ version: 1.1.1
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ none: false
27
+ requirements:
28
+ - - ~>
29
+ - !ruby/object:Gem::Version
30
+ version: 1.1.1
31
+ - !ruby/object:Gem::Dependency
32
+ name: nokogiri
33
+ requirement: !ruby/object:Gem::Requirement
34
+ none: false
35
+ requirements:
36
+ - - ~>
37
+ - !ruby/object:Gem::Version
38
+ version: '1.4'
39
+ type: :runtime
40
+ prerelease: false
41
+ version_requirements: !ruby/object:Gem::Requirement
42
+ none: false
43
+ requirements:
44
+ - - ~>
45
+ - !ruby/object:Gem::Version
46
+ version: '1.4'
47
+ - !ruby/object:Gem::Dependency
48
+ name: github-markdown
49
+ requirement: !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: '0.5'
55
+ type: :runtime
56
+ prerelease: false
57
+ version_requirements: !ruby/object:Gem::Requirement
58
+ none: false
59
+ requirements:
60
+ - - ~>
61
+ - !ruby/object:Gem::Version
62
+ version: '0.5'
63
+ - !ruby/object:Gem::Dependency
64
+ name: sanitize
65
+ requirement: !ruby/object:Gem::Requirement
66
+ none: false
67
+ requirements:
68
+ - - ~>
69
+ - !ruby/object:Gem::Version
70
+ version: '2.0'
71
+ type: :runtime
72
+ prerelease: false
73
+ version_requirements: !ruby/object:Gem::Requirement
74
+ none: false
75
+ requirements:
76
+ - - ~>
77
+ - !ruby/object:Gem::Version
78
+ version: '2.0'
79
+ - !ruby/object:Gem::Dependency
80
+ name: github-linguist
81
+ requirement: !ruby/object:Gem::Requirement
82
+ none: false
83
+ requirements:
84
+ - - ~>
85
+ - !ruby/object:Gem::Version
86
+ version: '2.1'
87
+ type: :runtime
88
+ prerelease: false
89
+ version_requirements: !ruby/object:Gem::Requirement
90
+ none: false
91
+ requirements:
92
+ - - ~>
93
+ - !ruby/object:Gem::Version
94
+ version: '2.1'
95
+ - !ruby/object:Gem::Dependency
96
+ name: rinku
97
+ requirement: !ruby/object:Gem::Requirement
98
+ none: false
99
+ requirements:
100
+ - - ~>
101
+ - !ruby/object:Gem::Version
102
+ version: '1.7'
103
+ type: :runtime
104
+ prerelease: false
105
+ version_requirements: !ruby/object:Gem::Requirement
106
+ none: false
107
+ requirements:
108
+ - - ~>
109
+ - !ruby/object:Gem::Version
110
+ version: '1.7'
111
+ - !ruby/object:Gem::Dependency
112
+ name: escape_utils
113
+ requirement: !ruby/object:Gem::Requirement
114
+ none: false
115
+ requirements:
116
+ - - ~>
117
+ - !ruby/object:Gem::Version
118
+ version: '0.2'
119
+ type: :runtime
120
+ prerelease: false
121
+ version_requirements: !ruby/object:Gem::Requirement
122
+ none: false
123
+ requirements:
124
+ - - ~>
125
+ - !ruby/object:Gem::Version
126
+ version: '0.2'
127
+ - !ruby/object:Gem::Dependency
128
+ name: activesupport
129
+ requirement: !ruby/object:Gem::Requirement
130
+ none: false
131
+ requirements:
132
+ - - ! '>='
133
+ - !ruby/object:Gem::Version
134
+ version: '2'
135
+ type: :runtime
136
+ prerelease: false
137
+ version_requirements: !ruby/object:Gem::Requirement
138
+ none: false
139
+ requirements:
140
+ - - ! '>='
141
+ - !ruby/object:Gem::Version
142
+ version: '2'
143
+ description: GitHub HTML processing filters and utilities
144
+ email:
145
+ - ryan@github.com
146
+ - jerry@github.com
147
+ executables: []
148
+ extensions: []
149
+ extra_rdoc_files: []
150
+ files:
151
+ - .gitignore
152
+ - .travis.yml
153
+ - Gemfile
154
+ - LICENSE
155
+ - README.md
156
+ - Rakefile
157
+ - html-pipeline.gemspec
158
+ - lib/html/pipeline.rb
159
+ - lib/html/pipeline/@mention_filter.rb
160
+ - lib/html/pipeline/autolink_filter.rb
161
+ - lib/html/pipeline/body_content.rb
162
+ - lib/html/pipeline/camo_filter.rb
163
+ - lib/html/pipeline/email_reply_filter.rb
164
+ - lib/html/pipeline/emoji_filter.rb
165
+ - lib/html/pipeline/filter.rb
166
+ - lib/html/pipeline/https_filter.rb
167
+ - lib/html/pipeline/image_max_width_filter.rb
168
+ - lib/html/pipeline/markdown_filter.rb
169
+ - lib/html/pipeline/plain_text_input_filter.rb
170
+ - lib/html/pipeline/sanitization_filter.rb
171
+ - lib/html/pipeline/syntax_highlight_filter.rb
172
+ - lib/html/pipeline/text_filter.rb
173
+ - lib/html/pipeline/textile_filter.rb
174
+ - lib/html/pipeline/toc_filter.rb
175
+ - lib/html/pipeline/version.rb
176
+ - test/html/pipeline/autolink_filter_test.rb
177
+ - test/html/pipeline/camo_filter_test.rb
178
+ - test/html/pipeline/emoji_filter_test.rb
179
+ - test/html/pipeline/image_max_width_filter_test.rb
180
+ - test/html/pipeline/markdown_filter_test.rb
181
+ - test/html/pipeline/mention_filter_test.rb
182
+ - test/html/pipeline/plain_text_input_filter_test.rb
183
+ - test/html/pipeline/sanitization_filter_test.rb
184
+ - test/html/pipeline/toc_filter_test.rb
185
+ - test/test_helper.rb
186
+ homepage: https://github.com/jch/html-pipeline
187
+ licenses: []
188
+ post_install_message:
189
+ rdoc_options: []
190
+ require_paths:
191
+ - lib
192
+ required_ruby_version: !ruby/object:Gem::Requirement
193
+ none: false
194
+ requirements:
195
+ - - ! '>='
196
+ - !ruby/object:Gem::Version
197
+ version: '0'
198
+ required_rubygems_version: !ruby/object:Gem::Requirement
199
+ none: false
200
+ requirements:
201
+ - - ! '>='
202
+ - !ruby/object:Gem::Version
203
+ version: '0'
204
+ requirements: []
205
+ rubyforge_project:
206
+ rubygems_version: 1.8.23
207
+ signing_key:
208
+ specification_version: 3
209
+ summary: Helpers for processing content through a chain of filters
210
+ test_files:
211
+ - test/html/pipeline/autolink_filter_test.rb
212
+ - test/html/pipeline/camo_filter_test.rb
213
+ - test/html/pipeline/emoji_filter_test.rb
214
+ - test/html/pipeline/image_max_width_filter_test.rb
215
+ - test/html/pipeline/markdown_filter_test.rb
216
+ - test/html/pipeline/mention_filter_test.rb
217
+ - test/html/pipeline/plain_text_input_filter_test.rb
218
+ - test/html/pipeline/sanitization_filter_test.rb
219
+ - test/html/pipeline/toc_filter_test.rb
220
+ - test/test_helper.rb
221
+ has_rdoc: