html-pipeline 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (36) hide show
  1. data/.gitignore +19 -0
  2. data/.travis.yml +13 -0
  3. data/Gemfile +9 -0
  4. data/LICENSE +22 -0
  5. data/README.md +128 -0
  6. data/Rakefile +11 -0
  7. data/html-pipeline.gemspec +25 -0
  8. data/lib/html/pipeline.rb +130 -0
  9. data/lib/html/pipeline/@mention_filter.rb +118 -0
  10. data/lib/html/pipeline/autolink_filter.rb +22 -0
  11. data/lib/html/pipeline/body_content.rb +42 -0
  12. data/lib/html/pipeline/camo_filter.rb +64 -0
  13. data/lib/html/pipeline/email_reply_filter.rb +56 -0
  14. data/lib/html/pipeline/emoji_filter.rb +48 -0
  15. data/lib/html/pipeline/filter.rb +158 -0
  16. data/lib/html/pipeline/https_filter.rb +13 -0
  17. data/lib/html/pipeline/image_max_width_filter.rb +37 -0
  18. data/lib/html/pipeline/markdown_filter.rb +29 -0
  19. data/lib/html/pipeline/plain_text_input_filter.rb +11 -0
  20. data/lib/html/pipeline/sanitization_filter.rb +107 -0
  21. data/lib/html/pipeline/syntax_highlight_filter.rb +29 -0
  22. data/lib/html/pipeline/text_filter.rb +14 -0
  23. data/lib/html/pipeline/textile_filter.rb +21 -0
  24. data/lib/html/pipeline/toc_filter.rb +28 -0
  25. data/lib/html/pipeline/version.rb +5 -0
  26. data/test/html/pipeline/autolink_filter_test.rb +22 -0
  27. data/test/html/pipeline/camo_filter_test.rb +39 -0
  28. data/test/html/pipeline/emoji_filter_test.rb +16 -0
  29. data/test/html/pipeline/image_max_width_filter_test.rb +50 -0
  30. data/test/html/pipeline/markdown_filter_test.rb +101 -0
  31. data/test/html/pipeline/mention_filter_test.rb +158 -0
  32. data/test/html/pipeline/plain_text_input_filter_test.rb +22 -0
  33. data/test/html/pipeline/sanitization_filter_test.rb +47 -0
  34. data/test/html/pipeline/toc_filter_test.rb +47 -0
  35. data/test/test_helper.rb +38 -0
  36. metadata +221 -0
@@ -0,0 +1,158 @@
1
+ require "test_helper"
2
+
3
+ class HTML::Pipeline::MentionFilterTest < Test::Unit::TestCase
4
+ def filter(html, base_url='/', info_url=nil)
5
+ HTML::Pipeline::MentionFilter.call(html, :base_url => base_url, :info_url => info_url)
6
+ end
7
+
8
+ def test_filtering_a_documentfragment
9
+ body = "<p>@kneath: check it out.</p>"
10
+ doc = Nokogiri::HTML::DocumentFragment.parse(body)
11
+
12
+ res = filter(doc, '/')
13
+ assert_same doc, res
14
+
15
+ link = "<a href=\"/kneath\" class=\"user-mention\">@kneath</a>"
16
+ assert_equal "<p>#{link}: check it out.</p>",
17
+ res.to_html
18
+ end
19
+
20
+ def test_filtering_plain_text
21
+ body = "<p>@kneath: check it out.</p>"
22
+ res = filter(body, '/')
23
+
24
+ link = "<a href=\"/kneath\" class=\"user-mention\">@kneath</a>"
25
+ assert_equal "<p>#{link}: check it out.</p>",
26
+ res.to_html
27
+ end
28
+
29
+ def test_not_replacing_mentions_in_pre_tags
30
+ body = "<pre>@kneath: okay</pre>"
31
+ assert_equal body, filter(body).to_html
32
+ end
33
+
34
+ def test_not_replacing_mentions_in_code_tags
35
+ body = "<p><code>@kneath:</code> okay</p>"
36
+ assert_equal body, filter(body).to_html
37
+ end
38
+
39
+ def test_not_replacing_mentions_in_links
40
+ body = "<p><a>@kneath</a> okay</p>"
41
+ assert_equal body, filter(body).to_html
42
+ end
43
+
44
+ def test_entity_encoding_and_whatnot
45
+ body = "<p>@&#x6b;neath what's up</p>"
46
+ link = "<a href=\"/kneath\" class=\"user-mention\">@kneath</a>"
47
+ assert_equal "<p>#{link} what's up</p>", filter(body, '/').to_html
48
+ end
49
+
50
+ def test_html_injection
51
+ body = "<p>@kneath &lt;script>alert(0)&lt;/script></p>"
52
+ link = "<a href=\"/kneath\" class=\"user-mention\">@kneath</a>"
53
+ assert_equal "<p>#{link} &lt;script&gt;alert(0)&lt;/script&gt;</p>",
54
+ filter(body, '/').to_html
55
+ end
56
+
57
+ def test_links_to_nothing_when_no_info_url_given
58
+ body = "<p>How do I @mention someone?</p>"
59
+ assert_equal "<p>How do I @mention someone?</p>",
60
+ filter(body, '/').to_html
61
+ end
62
+
63
+ def test_links_to_more_info_when_info_url_given
64
+ body = "<p>How do I @mention someone?</p>"
65
+ link = "<a href=\"https://github.com/blog/821\" class=\"user-mention\">@mention</a>"
66
+ assert_equal "<p>How do I #{link} someone?</p>",
67
+ filter(body, '/', 'https://github.com/blog/821').to_html
68
+ end
69
+
70
+ MarkdownPipeline =
71
+ HTML::Pipeline.new [
72
+ HTML::Pipeline::MarkdownFilter,
73
+ HTML::Pipeline::MentionFilter
74
+ ]
75
+
76
+ def mentioned_usernames
77
+ result = {}
78
+ MarkdownPipeline.call(@body, {}, result)
79
+ html = result[:output].to_html
80
+ users = html.scan(/user-mention">@(.+?)</)
81
+ users ? users.flatten.uniq : []
82
+ end
83
+
84
+ def test_matches_usernames_in_body
85
+ @body = "@test how are you?"
86
+ assert_equal %w[test], mentioned_usernames
87
+ end
88
+
89
+ def test_matches_usernames_with_dashes
90
+ @body = "hi @some-user"
91
+ assert_equal %w[some-user], mentioned_usernames
92
+ end
93
+
94
+ def test_matches_usernames_followed_by_a_single_dot
95
+ @body = "okay @some-user."
96
+ assert_equal %w[some-user], mentioned_usernames
97
+ end
98
+
99
+ def test_matches_usernames_followed_by_multiple_dots
100
+ @body = "okay @some-user..."
101
+ assert_equal %w[some-user], mentioned_usernames
102
+ end
103
+
104
+ def test_does_not_match_email_addresses
105
+ @body = "aman@tmm1.net"
106
+ assert_equal [], mentioned_usernames
107
+ end
108
+
109
+ def test_does_not_match_domain_name_looking_things
110
+ @body = "we need a @github.com email"
111
+ assert_equal [], mentioned_usernames
112
+ end
113
+
114
+ def test_does_not_match_organization_team_mentions
115
+ @body = "we need to @github/enterprise know"
116
+ assert_equal [], mentioned_usernames
117
+ end
118
+
119
+ def test_matches_colon_suffixed_names
120
+ @body = "@tmm1: what do you think?"
121
+ assert_equal %w[tmm1], mentioned_usernames
122
+ end
123
+
124
+ def test_matches_list_of_names
125
+ @body = "@defunkt @atmos @kneath"
126
+ assert_equal %w[defunkt atmos kneath], mentioned_usernames
127
+ end
128
+
129
+ def test_matches_list_of_names_with_commas
130
+ @body = "/cc @defunkt, @atmos, @kneath"
131
+ assert_equal %w[defunkt atmos kneath], mentioned_usernames
132
+ end
133
+
134
+ def test_matches_inside_brackets
135
+ @body = "(@mislav) and [@rtomayko]"
136
+ assert_equal %w[mislav rtomayko], mentioned_usernames
137
+ end
138
+
139
+ def test_doesnt_ignore_invalid_users
140
+ @body = "@defunkt @mojombo and @somedude"
141
+ assert_equal ['defunkt', 'mojombo', 'somedude'], mentioned_usernames
142
+ end
143
+
144
+ def test_returns_distinct_set
145
+ @body = "/cc @defunkt, @atmos, @kneath, @defunkt, @defunkt"
146
+ assert_equal %w[defunkt atmos kneath], mentioned_usernames
147
+ end
148
+
149
+ def test_does_not_match_inline_code_block_with_multiple_code_blocks
150
+ @body = "something\n\n`/cc @defunkt @atmos @kneath` `/cc @atmos/atmos`"
151
+ assert_equal %w[], mentioned_usernames
152
+ end
153
+
154
+ def test_mention_at_end_of_parenthetical_sentence
155
+ @body = "(We're talking 'bout @ymendel.)"
156
+ assert_equal %w[ymendel], mentioned_usernames
157
+ end
158
+ end
@@ -0,0 +1,22 @@
1
+ require "test_helper"
2
+
3
+ class HTML::Pipeline::PlainTextInputFilterTest < Test::Unit::TestCase
4
+ PlainTextInputFilter = HTML::Pipeline::PlainTextInputFilter
5
+
6
+ def test_fails_when_given_a_documentfragment
7
+ body = "<p>heyo</p>"
8
+ doc = Nokogiri::HTML::DocumentFragment.parse(body)
9
+ assert_raise(TypeError) { PlainTextInputFilter.call(doc, {}) }
10
+ end
11
+
12
+ def test_wraps_input_in_a_div_element
13
+ doc = PlainTextInputFilter.call("howdy pahtner", {})
14
+ assert_equal "<div>howdy pahtner</div>", doc.to_s
15
+ end
16
+
17
+ def test_html_escapes_plain_text_input
18
+ doc = PlainTextInputFilter.call("See: <http://example.org>", {})
19
+ assert_equal "<div>See: &lt;http://example.org&gt;</div>",
20
+ doc.to_s
21
+ end
22
+ end
@@ -0,0 +1,47 @@
1
+ require "test_helper"
2
+
3
+ class HTML::Pipeline::SanitizationFilterTest < Test::Unit::TestCase
4
+ SanitizationFilter = HTML::Pipeline::SanitizationFilter
5
+
6
+ def test_removing_script_tags
7
+ orig = %(<p><img src="http://github.com/img.png" /><script></script></p>)
8
+ html = SanitizationFilter.call(orig).to_s
9
+ assert_no_match /script/, html
10
+ end
11
+
12
+ def test_removing_style_tags
13
+ orig = %(<p><style>hey now</style></p>)
14
+ html = SanitizationFilter.call(orig).to_s
15
+ assert_no_match /style/, html
16
+ end
17
+
18
+ def test_removing_style_attributes
19
+ orig = %(<p style='font-size:1000%'>YO DAWG</p>)
20
+ html = SanitizationFilter.call(orig).to_s
21
+ assert_no_match /font-size/, html
22
+ assert_no_match /style/, html
23
+ end
24
+
25
+ def test_removing_script_event_handler_attributes
26
+ orig = %(<a onclick='javascript:alert(0)'>YO DAWG</a>)
27
+ html = SanitizationFilter.call(orig).to_s
28
+ assert_no_match /javscript/, html
29
+ assert_no_match /onclick/, html
30
+ end
31
+
32
+ def test_sanitizes_li_elements_not_contained_in_ul_or_ol
33
+ stuff = "a\n<li>b</li>\nc"
34
+ html = SanitizationFilter.call(stuff).to_s
35
+ assert_equal "a\n b \nc", html
36
+ end
37
+
38
+ def test_does_not_sanitize_li_elements_contained_in_ul_or_ol
39
+ stuff = "a\n<ul><li>b</li></ul>\nc"
40
+ assert_equal stuff, SanitizationFilter.call(stuff).to_s
41
+ end
42
+
43
+ def test_github_specific_protocols_are_not_removed
44
+ stuff = '<a href="github-windows://spillthelog">Spill this yo</a> and so on'
45
+ assert_equal stuff, SanitizationFilter.call(stuff).to_s
46
+ end
47
+ end
@@ -0,0 +1,47 @@
1
+ require "test_helper"
2
+
3
+ class HTML::Pipeline::TableOfContentsFilterTest < Test::Unit::TestCase
4
+ TocFilter = HTML::Pipeline::TableOfContentsFilter
5
+
6
+ def test_anchors_are_added_properly
7
+ orig = %(<h1>Ice cube</h1><p>Will swarm on any motherfucker in a blue uniform</p>)
8
+ assert_includes '<a name=', TocFilter.call(orig).to_s
9
+ end
10
+
11
+ def test_anchors_have_sane_names
12
+ orig = %(<h1>Dr Dre</h1><h1>Ice Cube</h1><h1>Eazy-E</h1><h1>MC Ren</h1>)
13
+ result = TocFilter.call(orig).to_s
14
+
15
+ assert_includes '"dr-dre"', result
16
+ assert_includes '"ice-cube"', result
17
+ assert_includes '"eazy-e"', result
18
+ assert_includes '"mc-ren"', result
19
+ end
20
+
21
+ def test_dupe_headers_have_unique_trailing_identifiers
22
+ orig = %(<h1>Straight Outta Compton</h1>
23
+ <h2>Dopeman</h2>
24
+ <h3>Express Yourself</h3>
25
+ <h1>Dopeman</h1>)
26
+
27
+ result = TocFilter.call(orig).to_s
28
+
29
+ assert_includes '"dopeman"', result
30
+ assert_includes '"dopeman-1"', result
31
+ end
32
+
33
+ def test_all_header_tags_are_found_when_adding_anchors
34
+ orig = %(<h1>"Funky President" by James Brown</h1>
35
+ <h2>"It's My Thing" by Marva Whitney</h2>
36
+ <h3>"Boogie Back" by Roy Ayers</h3>
37
+ <h4>"Feel Good" by Fancy</h4>
38
+ <h5>"Funky Drummer" by James Brown</h5>
39
+ <h6>"Ruthless Villain" by Eazy-E</h6>
40
+ <h7>"Be Thankful for What You Got" by William DeVaughn</h7>)
41
+
42
+ doc = TocFilter.call(orig)
43
+ assert_equal 6, doc.search('a').size
44
+ end
45
+ end
46
+
47
+
@@ -0,0 +1,38 @@
1
+ require 'bundler/setup'
2
+ require 'html/pipeline'
3
+ require 'test/unit'
4
+
5
+ require 'active_support/core_ext/object/try'
6
+
7
+ module TestHelpers
8
+ # Asserts that `needle` is not a member of `haystack`, where
9
+ # `haystack` is any object that responds to `include?`.
10
+ def assert_doesnt_include(needle, haystack, message = nil)
11
+ error = '<?> included in <?>'
12
+ message = build_message(message, error, needle.to_s, Array(haystack).map(&:to_s))
13
+
14
+ assert_block message do
15
+ !haystack.include?(needle)
16
+ end
17
+ end
18
+
19
+ # Asserts that `needle` is a member of `haystack`, where
20
+ # `haystack` is any object that responds to `include?`.
21
+ def assert_includes(needle, haystack, message = nil)
22
+ error = '<?> not included in <?>'
23
+ message = build_message(message, error, needle.to_s, Array(haystack).map(&:to_s))
24
+
25
+ assert_block message do
26
+ haystack.include?(needle)
27
+ end
28
+ end
29
+
30
+ # Asserts that two html fragments are equivalent. Attribute order
31
+ # will be ignored.
32
+ def assert_equal_html(expected, actual)
33
+ assert_equal Nokogiri::HTML::DocumentFragment.parse(expected).to_hash,
34
+ Nokogiri::HTML::DocumentFragment.parse(actual).to_hash
35
+ end
36
+ end
37
+
38
+ Test::Unit::TestCase.send(:include, TestHelpers)
metadata ADDED
@@ -0,0 +1,221 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: html-pipeline
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.4
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Ryan Tomayko
9
+ - Jerry Cheung
10
+ autorequire:
11
+ bindir: bin
12
+ cert_chain: []
13
+ date: 2012-11-07 00:00:00.000000000 Z
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: gemoji
17
+ requirement: !ruby/object:Gem::Requirement
18
+ none: false
19
+ requirements:
20
+ - - ~>
21
+ - !ruby/object:Gem::Version
22
+ version: 1.1.1
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ none: false
27
+ requirements:
28
+ - - ~>
29
+ - !ruby/object:Gem::Version
30
+ version: 1.1.1
31
+ - !ruby/object:Gem::Dependency
32
+ name: nokogiri
33
+ requirement: !ruby/object:Gem::Requirement
34
+ none: false
35
+ requirements:
36
+ - - ~>
37
+ - !ruby/object:Gem::Version
38
+ version: '1.4'
39
+ type: :runtime
40
+ prerelease: false
41
+ version_requirements: !ruby/object:Gem::Requirement
42
+ none: false
43
+ requirements:
44
+ - - ~>
45
+ - !ruby/object:Gem::Version
46
+ version: '1.4'
47
+ - !ruby/object:Gem::Dependency
48
+ name: github-markdown
49
+ requirement: !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: '0.5'
55
+ type: :runtime
56
+ prerelease: false
57
+ version_requirements: !ruby/object:Gem::Requirement
58
+ none: false
59
+ requirements:
60
+ - - ~>
61
+ - !ruby/object:Gem::Version
62
+ version: '0.5'
63
+ - !ruby/object:Gem::Dependency
64
+ name: sanitize
65
+ requirement: !ruby/object:Gem::Requirement
66
+ none: false
67
+ requirements:
68
+ - - ~>
69
+ - !ruby/object:Gem::Version
70
+ version: '2.0'
71
+ type: :runtime
72
+ prerelease: false
73
+ version_requirements: !ruby/object:Gem::Requirement
74
+ none: false
75
+ requirements:
76
+ - - ~>
77
+ - !ruby/object:Gem::Version
78
+ version: '2.0'
79
+ - !ruby/object:Gem::Dependency
80
+ name: github-linguist
81
+ requirement: !ruby/object:Gem::Requirement
82
+ none: false
83
+ requirements:
84
+ - - ~>
85
+ - !ruby/object:Gem::Version
86
+ version: '2.1'
87
+ type: :runtime
88
+ prerelease: false
89
+ version_requirements: !ruby/object:Gem::Requirement
90
+ none: false
91
+ requirements:
92
+ - - ~>
93
+ - !ruby/object:Gem::Version
94
+ version: '2.1'
95
+ - !ruby/object:Gem::Dependency
96
+ name: rinku
97
+ requirement: !ruby/object:Gem::Requirement
98
+ none: false
99
+ requirements:
100
+ - - ~>
101
+ - !ruby/object:Gem::Version
102
+ version: '1.7'
103
+ type: :runtime
104
+ prerelease: false
105
+ version_requirements: !ruby/object:Gem::Requirement
106
+ none: false
107
+ requirements:
108
+ - - ~>
109
+ - !ruby/object:Gem::Version
110
+ version: '1.7'
111
+ - !ruby/object:Gem::Dependency
112
+ name: escape_utils
113
+ requirement: !ruby/object:Gem::Requirement
114
+ none: false
115
+ requirements:
116
+ - - ~>
117
+ - !ruby/object:Gem::Version
118
+ version: '0.2'
119
+ type: :runtime
120
+ prerelease: false
121
+ version_requirements: !ruby/object:Gem::Requirement
122
+ none: false
123
+ requirements:
124
+ - - ~>
125
+ - !ruby/object:Gem::Version
126
+ version: '0.2'
127
+ - !ruby/object:Gem::Dependency
128
+ name: activesupport
129
+ requirement: !ruby/object:Gem::Requirement
130
+ none: false
131
+ requirements:
132
+ - - ! '>='
133
+ - !ruby/object:Gem::Version
134
+ version: '2'
135
+ type: :runtime
136
+ prerelease: false
137
+ version_requirements: !ruby/object:Gem::Requirement
138
+ none: false
139
+ requirements:
140
+ - - ! '>='
141
+ - !ruby/object:Gem::Version
142
+ version: '2'
143
+ description: GitHub HTML processing filters and utilities
144
+ email:
145
+ - ryan@github.com
146
+ - jerry@github.com
147
+ executables: []
148
+ extensions: []
149
+ extra_rdoc_files: []
150
+ files:
151
+ - .gitignore
152
+ - .travis.yml
153
+ - Gemfile
154
+ - LICENSE
155
+ - README.md
156
+ - Rakefile
157
+ - html-pipeline.gemspec
158
+ - lib/html/pipeline.rb
159
+ - lib/html/pipeline/@mention_filter.rb
160
+ - lib/html/pipeline/autolink_filter.rb
161
+ - lib/html/pipeline/body_content.rb
162
+ - lib/html/pipeline/camo_filter.rb
163
+ - lib/html/pipeline/email_reply_filter.rb
164
+ - lib/html/pipeline/emoji_filter.rb
165
+ - lib/html/pipeline/filter.rb
166
+ - lib/html/pipeline/https_filter.rb
167
+ - lib/html/pipeline/image_max_width_filter.rb
168
+ - lib/html/pipeline/markdown_filter.rb
169
+ - lib/html/pipeline/plain_text_input_filter.rb
170
+ - lib/html/pipeline/sanitization_filter.rb
171
+ - lib/html/pipeline/syntax_highlight_filter.rb
172
+ - lib/html/pipeline/text_filter.rb
173
+ - lib/html/pipeline/textile_filter.rb
174
+ - lib/html/pipeline/toc_filter.rb
175
+ - lib/html/pipeline/version.rb
176
+ - test/html/pipeline/autolink_filter_test.rb
177
+ - test/html/pipeline/camo_filter_test.rb
178
+ - test/html/pipeline/emoji_filter_test.rb
179
+ - test/html/pipeline/image_max_width_filter_test.rb
180
+ - test/html/pipeline/markdown_filter_test.rb
181
+ - test/html/pipeline/mention_filter_test.rb
182
+ - test/html/pipeline/plain_text_input_filter_test.rb
183
+ - test/html/pipeline/sanitization_filter_test.rb
184
+ - test/html/pipeline/toc_filter_test.rb
185
+ - test/test_helper.rb
186
+ homepage: https://github.com/jch/html-pipeline
187
+ licenses: []
188
+ post_install_message:
189
+ rdoc_options: []
190
+ require_paths:
191
+ - lib
192
+ required_ruby_version: !ruby/object:Gem::Requirement
193
+ none: false
194
+ requirements:
195
+ - - ! '>='
196
+ - !ruby/object:Gem::Version
197
+ version: '0'
198
+ required_rubygems_version: !ruby/object:Gem::Requirement
199
+ none: false
200
+ requirements:
201
+ - - ! '>='
202
+ - !ruby/object:Gem::Version
203
+ version: '0'
204
+ requirements: []
205
+ rubyforge_project:
206
+ rubygems_version: 1.8.23
207
+ signing_key:
208
+ specification_version: 3
209
+ summary: Helpers for processing content through a chain of filters
210
+ test_files:
211
+ - test/html/pipeline/autolink_filter_test.rb
212
+ - test/html/pipeline/camo_filter_test.rb
213
+ - test/html/pipeline/emoji_filter_test.rb
214
+ - test/html/pipeline/image_max_width_filter_test.rb
215
+ - test/html/pipeline/markdown_filter_test.rb
216
+ - test/html/pipeline/mention_filter_test.rb
217
+ - test/html/pipeline/plain_text_input_filter_test.rb
218
+ - test/html/pipeline/sanitization_filter_test.rb
219
+ - test/html/pipeline/toc_filter_test.rb
220
+ - test/test_helper.rb
221
+ has_rdoc: