html-pipeline 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +19 -0
- data/.travis.yml +13 -0
- data/Gemfile +9 -0
- data/LICENSE +22 -0
- data/README.md +128 -0
- data/Rakefile +11 -0
- data/html-pipeline.gemspec +25 -0
- data/lib/html/pipeline.rb +130 -0
- data/lib/html/pipeline/@mention_filter.rb +118 -0
- data/lib/html/pipeline/autolink_filter.rb +22 -0
- data/lib/html/pipeline/body_content.rb +42 -0
- data/lib/html/pipeline/camo_filter.rb +64 -0
- data/lib/html/pipeline/email_reply_filter.rb +56 -0
- data/lib/html/pipeline/emoji_filter.rb +48 -0
- data/lib/html/pipeline/filter.rb +158 -0
- data/lib/html/pipeline/https_filter.rb +13 -0
- data/lib/html/pipeline/image_max_width_filter.rb +37 -0
- data/lib/html/pipeline/markdown_filter.rb +29 -0
- data/lib/html/pipeline/plain_text_input_filter.rb +11 -0
- data/lib/html/pipeline/sanitization_filter.rb +107 -0
- data/lib/html/pipeline/syntax_highlight_filter.rb +29 -0
- data/lib/html/pipeline/text_filter.rb +14 -0
- data/lib/html/pipeline/textile_filter.rb +21 -0
- data/lib/html/pipeline/toc_filter.rb +28 -0
- data/lib/html/pipeline/version.rb +5 -0
- data/test/html/pipeline/autolink_filter_test.rb +22 -0
- data/test/html/pipeline/camo_filter_test.rb +39 -0
- data/test/html/pipeline/emoji_filter_test.rb +16 -0
- data/test/html/pipeline/image_max_width_filter_test.rb +50 -0
- data/test/html/pipeline/markdown_filter_test.rb +101 -0
- data/test/html/pipeline/mention_filter_test.rb +158 -0
- data/test/html/pipeline/plain_text_input_filter_test.rb +22 -0
- data/test/html/pipeline/sanitization_filter_test.rb +47 -0
- data/test/html/pipeline/toc_filter_test.rb +47 -0
- data/test/test_helper.rb +38 -0
- metadata +221 -0
data/.gitignore
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2012 GitHub Inc. and Jerry Cheung
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,128 @@
|
|
1
|
+
# HTML::Pipeline [](http://travis-ci.org/jch/html-pipeline)
|
2
|
+
|
3
|
+
GitHub HTML processing filters and utilities. This module includes a small
|
4
|
+
framework for defining DOM based content filters and applying them to user
|
5
|
+
provided content.
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application's Gemfile:
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
gem 'html-pipeline'
|
13
|
+
```
|
14
|
+
|
15
|
+
And then execute:
|
16
|
+
|
17
|
+
```sh
|
18
|
+
$ bundle
|
19
|
+
```
|
20
|
+
|
21
|
+
Or install it yourself as:
|
22
|
+
|
23
|
+
```sh
|
24
|
+
$ gem install html-pipeline
|
25
|
+
```
|
26
|
+
|
27
|
+
## Usage
|
28
|
+
|
29
|
+
This library provides a handful of chainable HTML filters to transform user
|
30
|
+
content into markup. A filter takes an HTML string or
|
31
|
+
`Nokogiri::HTML::DocumentFragment`, optionally manipulates it, and then
|
32
|
+
outputs the result.
|
33
|
+
|
34
|
+
For example, to transform Markdown source into Markdown HTML:
|
35
|
+
|
36
|
+
```ruby
|
37
|
+
require 'html/pipeline'
|
38
|
+
|
39
|
+
filter = HTML::Pipeline::MarkdownFilter.new("Hi **world**!")
|
40
|
+
filter.call
|
41
|
+
```
|
42
|
+
|
43
|
+
Filters can be combined into a pipeline which causes each filter to hand its
|
44
|
+
output to the next filter's input. So if you wanted to have content be
|
45
|
+
filtered through Markdown and be syntax highlighted, you can create the
|
46
|
+
following pipeline:
|
47
|
+
|
48
|
+
```ruby
|
49
|
+
pipeline = HTML::Pipeline.new [
|
50
|
+
HTML::Pipeline::MarkdownFilter,
|
51
|
+
HTML::Pipeline::SyntaxHighlightFilter
|
52
|
+
]
|
53
|
+
result = pipeline.call <<CODE
|
54
|
+
This is *great*:
|
55
|
+
|
56
|
+
some_code(:first)
|
57
|
+
|
58
|
+
CODE
|
59
|
+
result[:output].to_s
|
60
|
+
```
|
61
|
+
|
62
|
+
Prints:
|
63
|
+
|
64
|
+
```html
|
65
|
+
<p>This is <em>great</em>:</p>
|
66
|
+
|
67
|
+
<div class="highlight">
|
68
|
+
<pre><span class="n">some_code</span><span class="p">(</span><span class="ss">:first</span><span class="p">)</span>
|
69
|
+
</pre>
|
70
|
+
</div>
|
71
|
+
```
|
72
|
+
|
73
|
+
Some filters take an optional **context** and/or **result** hash. These are
|
74
|
+
used to pass around arguments and metadata between filters in a pipeline. For
|
75
|
+
example, if you want don't want to use GitHub formatted Markdown, you can
|
76
|
+
pass an option in the context hash:
|
77
|
+
|
78
|
+
```ruby
|
79
|
+
filter = HTML::Pipeline::MarkdownFilter.new("Hi **world**!", :gfm => false)
|
80
|
+
filter.call
|
81
|
+
```
|
82
|
+
|
83
|
+
## Filters
|
84
|
+
|
85
|
+
* `MentionFilter` - replace `@user` mentions with links
|
86
|
+
* `AutoLinkFilter` - auto_linking urls in HTML
|
87
|
+
* `CamoFilter` - replace http image urls with [camo-fied](https://github.com/github/camo) https versions
|
88
|
+
* `EmailReplyFilter` - util filter for working with emails
|
89
|
+
* `EmojiFilter` - everyone loves [emoji](http://www.emoji-cheat-sheet.com/)!
|
90
|
+
* `ImageMaxWidthFilter` - link to full size image for large images
|
91
|
+
* `MarkdownFilter` - convert markdown to html
|
92
|
+
* `PlainTextInputFilter` - html escape text and wrap the result in a div
|
93
|
+
* `SanitizationFilter` - whitelist santize user markup
|
94
|
+
* `SyntaxHighlightFilter` - code syntax highlighter with [linguist](https://github.com/github/linguist)
|
95
|
+
* `TextileFilter` - convert textile to html
|
96
|
+
* `TableOfContentsFilter` - anchor headings with name attributes
|
97
|
+
|
98
|
+
## Development Setup
|
99
|
+
|
100
|
+
```sh
|
101
|
+
bundle
|
102
|
+
rake test
|
103
|
+
```
|
104
|
+
|
105
|
+
## Contributing
|
106
|
+
|
107
|
+
1. Fork it
|
108
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
109
|
+
3. Commit your changes (`git commit -am 'Added some feature'`)
|
110
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
111
|
+
5. Create new Pull Request
|
112
|
+
|
113
|
+
|
114
|
+
## TODO
|
115
|
+
|
116
|
+
* test whether emoji filter works on heroku
|
117
|
+
* test whether nokogiri monkey patch is still necessary
|
118
|
+
|
119
|
+
## Contributors
|
120
|
+
|
121
|
+
* [Aman Gupta](mailto:aman@tmm1.net)
|
122
|
+
* [Jake Boxer](mailto:jake@github.com)
|
123
|
+
* [Joshua Peek](mailto:josh@joshpeek.com)
|
124
|
+
* [Kyle Neath](mailto:kneath@gmail.com)
|
125
|
+
* [Rob Sanheim](mailto:rsanheim@gmail.com)
|
126
|
+
* [Simon Rozet](mailto:simon@rozet.name)
|
127
|
+
* [Vicent Martí](mailto:tanoku@gmail.com)
|
128
|
+
* [Risk :danger: Olson](mailto:technoweenie@gmail.com)
|
data/Rakefile
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require File.expand_path('../lib/html/pipeline/version', __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |gem|
|
5
|
+
gem.name = "html-pipeline"
|
6
|
+
gem.version = HTML::Pipeline::VERSION
|
7
|
+
gem.authors = ["Ryan Tomayko", "Jerry Cheung"]
|
8
|
+
gem.email = ["ryan@github.com", "jerry@github.com"]
|
9
|
+
gem.description = %q{GitHub HTML processing filters and utilities}
|
10
|
+
gem.summary = %q{Helpers for processing content through a chain of filters}
|
11
|
+
gem.homepage = "https://github.com/jch/html-pipeline"
|
12
|
+
|
13
|
+
gem.files = `git ls-files`.split $/
|
14
|
+
gem.test_files = gem.files.grep(%r{^test})
|
15
|
+
gem.require_paths = ["lib"]
|
16
|
+
|
17
|
+
gem.add_dependency 'gemoji', '~> 1.1.1'
|
18
|
+
gem.add_dependency 'nokogiri', '~> 1.4'
|
19
|
+
gem.add_dependency 'github-markdown', '~> 0.5'
|
20
|
+
gem.add_dependency 'sanitize', '~> 2.0'
|
21
|
+
gem.add_dependency 'github-linguist', '~> 2.1'
|
22
|
+
gem.add_dependency 'rinku', '~> 1.7'
|
23
|
+
gem.add_dependency 'escape_utils', '~> 0.2'
|
24
|
+
gem.add_dependency 'activesupport', '>= 2'
|
25
|
+
end
|
@@ -0,0 +1,130 @@
|
|
1
|
+
require "nokogiri"
|
2
|
+
require "active_support/xml_mini/nokogiri" # convert Documents to hashes
|
3
|
+
require "escape_utils"
|
4
|
+
|
5
|
+
module HTML
|
6
|
+
# GitHub HTML processing filters and utilities. This module includes a small
|
7
|
+
# framework for defining DOM based content filters and applying them to user
|
8
|
+
# provided content.
|
9
|
+
#
|
10
|
+
# See HTML::Pipeline::Filter for information on building filters.
|
11
|
+
#
|
12
|
+
# Contruct a Pipeline for running multiple HTML filters. A pipeline is created once
|
13
|
+
# with one to many filters, and is then can be `call`ed many times over the course
|
14
|
+
# of its lifetime with input.
|
15
|
+
#
|
16
|
+
# filters - Array of Filter objects. Each must respond to call(doc,
|
17
|
+
# context) and return the modified DocumentFragment or a
|
18
|
+
# String containing HTML markup. Filters are performed in the
|
19
|
+
# order provided.
|
20
|
+
# default_context - The default context hash. Values specified here will be merged
|
21
|
+
# into values from the each individual pipeline run. Can NOT be
|
22
|
+
# nil. Default: empty Hash.
|
23
|
+
# result_class - The default Class of the result object for individual
|
24
|
+
# calls. Default: Hash. Protip: Pass in a Struct to get
|
25
|
+
# some semblence of type safety.
|
26
|
+
class Pipeline
|
27
|
+
autoload :VERSION, 'html/pipeline/version'
|
28
|
+
autoload :Pipeline, 'html/pipeline/pipeline'
|
29
|
+
autoload :Filter, 'html/pipeline/filter'
|
30
|
+
autoload :BodyContent, 'html/pipeline/body_content'
|
31
|
+
autoload :AutolinkFilter, 'html/pipeline/autolink_filter'
|
32
|
+
autoload :CamoFilter, 'html/pipeline/camo_filter'
|
33
|
+
autoload :EmailReplyFilter, 'html/pipeline/email_reply_filter'
|
34
|
+
autoload :EmojiFilter, 'html/pipeline/emoji_filter'
|
35
|
+
autoload :HttpsFilter, 'html/pipeline/https_filter'
|
36
|
+
autoload :ImageMaxWidthFilter, 'html/pipeline/image_max_width_filter'
|
37
|
+
autoload :MarkdownFilter, 'html/pipeline/markdown_filter'
|
38
|
+
autoload :MentionFilter, 'html/pipeline/@mention_filter'
|
39
|
+
autoload :PlainTextInputFilter, 'html/pipeline/plain_text_input_filter'
|
40
|
+
autoload :SanitizationFilter, 'html/pipeline/sanitization_filter'
|
41
|
+
autoload :SyntaxHighlightFilter, 'html/pipeline/syntax_highlight_filter'
|
42
|
+
autoload :TextileFilter, 'html/pipeline/textile_filter'
|
43
|
+
autoload :TableOfContentsFilter, 'html/pipeline/toc_filter'
|
44
|
+
autoload :TextFilter, 'html/pipeline/text_filter'
|
45
|
+
|
46
|
+
# Our DOM implementation.
|
47
|
+
DocumentFragment = Nokogiri::HTML::DocumentFragment
|
48
|
+
|
49
|
+
# Parse a String into a DocumentFragment object. When a DocumentFragment is
|
50
|
+
# provided, return it verbatim.
|
51
|
+
def self.parse(document_or_html)
|
52
|
+
document_or_html ||= ''
|
53
|
+
if document_or_html.is_a?(String)
|
54
|
+
DocumentFragment.parse(document_or_html)
|
55
|
+
else
|
56
|
+
document_or_html
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# Public: Returns an Array of Filter objects for this Pipeline.
|
61
|
+
attr_reader :filters
|
62
|
+
|
63
|
+
def initialize(filters, default_context = {}, result_class = nil)
|
64
|
+
raise ArgumentError, "default_context cannot be nil" if default_context.nil?
|
65
|
+
@filters = filters.flatten.freeze
|
66
|
+
@default_context = default_context.freeze
|
67
|
+
@result_class = result_class || Hash
|
68
|
+
end
|
69
|
+
|
70
|
+
# Apply all filters in the pipeline to the given HTML.
|
71
|
+
#
|
72
|
+
# html - A String containing HTML or a DocumentFragment object.
|
73
|
+
# context - The context hash passed to each filter. See the Filter docs
|
74
|
+
# for more info on possible values. This object MUST NOT be modified
|
75
|
+
# in place by filters. Use the Result for passing state back.
|
76
|
+
# result - The result Hash passed to each filter for modification. This
|
77
|
+
# is where Filters store extracted information from the content.
|
78
|
+
#
|
79
|
+
# Returns the result Hash after being filtered by this Pipeline. Contains an
|
80
|
+
# :output key with the DocumentFragment or String HTML markup based on the
|
81
|
+
# output of the last filter in the pipeline.
|
82
|
+
def call(html, context = {}, result = nil)
|
83
|
+
context = @default_context.merge(context)
|
84
|
+
context = context.freeze
|
85
|
+
result ||= @result_class.new
|
86
|
+
result[:output] = @filters.inject(html) { |doc, filter| filter.call(doc, context, result) }
|
87
|
+
result
|
88
|
+
end
|
89
|
+
|
90
|
+
# Like call but guarantee the value returned is a DocumentFragment.
|
91
|
+
# Pipelines may return a DocumentFragment or a String. Callers that need a
|
92
|
+
# DocumentFragment should use this method.
|
93
|
+
def to_document(input, context = {}, result = nil)
|
94
|
+
result = call(input, context, result)
|
95
|
+
HTML::Pipeline.parse(result[:output])
|
96
|
+
end
|
97
|
+
|
98
|
+
# Like call but guarantee the value returned is a string of HTML markup.
|
99
|
+
def to_html(input, context = {}, result = nil)
|
100
|
+
result = call(input, context, result = nil)
|
101
|
+
output = result[:output]
|
102
|
+
if output.respond_to?(:to_html)
|
103
|
+
output.to_html
|
104
|
+
else
|
105
|
+
output.to_s
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
# XXX nokogiri monkey patches
|
112
|
+
class Nokogiri::XML::Node
|
113
|
+
# Work around an issue with utf-8 encoded data being erroneously converted to
|
114
|
+
# ... some other shit when replacing text nodes. See 'utf-8 output 2' in
|
115
|
+
# user_content_test.rb for details.
|
116
|
+
def replace_with_encoding_fix(replacement)
|
117
|
+
if replacement.respond_to?(:to_str)
|
118
|
+
replacement = document.fragment("<div>#{replacement}</div>").children.first.children
|
119
|
+
end
|
120
|
+
replace_without_encoding_fix(replacement)
|
121
|
+
end
|
122
|
+
|
123
|
+
alias_method :replace_without_encoding_fix, :replace
|
124
|
+
alias_method :replace, :replace_with_encoding_fix
|
125
|
+
|
126
|
+
def swap(replacement)
|
127
|
+
replace(replacement)
|
128
|
+
self
|
129
|
+
end
|
130
|
+
end
|
@@ -0,0 +1,118 @@
|
|
1
|
+
require 'set'
|
2
|
+
|
3
|
+
module HTML
|
4
|
+
class Pipeline
|
5
|
+
# HTML filter that replaces @user mentions with links. Mentions within <pre>,
|
6
|
+
# <code>, and <a> elements are ignored. Mentions that reference users that do
|
7
|
+
# not exist are ignored.
|
8
|
+
#
|
9
|
+
# Context options:
|
10
|
+
# :base_url - Used to construct links to user profile pages for each
|
11
|
+
# mention.
|
12
|
+
# :info_url - Used to link to "more info" when someone mentions @mention
|
13
|
+
# or @mentioned.
|
14
|
+
#
|
15
|
+
class MentionFilter < Filter
|
16
|
+
# Public: Find user @mentions in text. See
|
17
|
+
# MentionFilter#mention_link_filter.
|
18
|
+
#
|
19
|
+
# MentionFilter.mentioned_logins_in(text) do |match, login, is_mentioned|
|
20
|
+
# "<a href=...>#{login}</a>"
|
21
|
+
# end
|
22
|
+
#
|
23
|
+
# text - String text to search.
|
24
|
+
#
|
25
|
+
# Yields the String match, the String login name, and a Boolean determining
|
26
|
+
# if the match = "@mention[ed]". The yield's return replaces the match in
|
27
|
+
# the original text.
|
28
|
+
#
|
29
|
+
# Returns a String replaced with the return of the block.
|
30
|
+
def self.mentioned_logins_in(text)
|
31
|
+
text.gsub MentionPattern do |match|
|
32
|
+
login = $1
|
33
|
+
yield match, login, MentionLogins.include?(login.downcase)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# Pattern used to extract @mentions from text.
|
38
|
+
MentionPattern = /
|
39
|
+
(?:^|\W) # beginning of string or non-word char
|
40
|
+
@((?>[a-z0-9][a-z0-9-]*)) # @username
|
41
|
+
(?!\/) # without a trailing slash
|
42
|
+
(?=
|
43
|
+
\.+[ \t\W]| # dots followed by space or non-word character
|
44
|
+
\.+$| # dots at end of line
|
45
|
+
[^0-9a-zA-Z_.]| # non-word character except dot
|
46
|
+
$ # end of line
|
47
|
+
)
|
48
|
+
/ix
|
49
|
+
|
50
|
+
# List of username logins that, when mentioned, link to the blog post
|
51
|
+
# about @mentions instead of triggering a real mention.
|
52
|
+
MentionLogins = %w(
|
53
|
+
mention
|
54
|
+
mentions
|
55
|
+
mentioned
|
56
|
+
mentioning
|
57
|
+
)
|
58
|
+
|
59
|
+
# Don't look for mentions in text nodes that are children of these elements
|
60
|
+
IGNORE_PARENTS = %w(pre code a).to_set
|
61
|
+
|
62
|
+
def call
|
63
|
+
doc.search('text()').each do |node|
|
64
|
+
content = node.to_html
|
65
|
+
next if !content.include?('@')
|
66
|
+
next if has_ancestor?(node, IGNORE_PARENTS)
|
67
|
+
html = mention_link_filter(content, base_url, info_url)
|
68
|
+
next if html == content
|
69
|
+
node.replace(html)
|
70
|
+
end
|
71
|
+
doc
|
72
|
+
end
|
73
|
+
|
74
|
+
# The URL to provide when someone @mentions a "mention" name, such as
|
75
|
+
# @mention or @mentioned, that will give them more info on mentions.
|
76
|
+
def info_url
|
77
|
+
context[:info_url] || nil
|
78
|
+
end
|
79
|
+
|
80
|
+
# Replace user @mentions in text with links to the mentioned user's
|
81
|
+
# profile page.
|
82
|
+
#
|
83
|
+
# text - String text to replace @mention usernames in.
|
84
|
+
# base_url - The base URL used to construct user profile URLs.
|
85
|
+
# info_url - The "more info" URL used to link to more info on @mentions.
|
86
|
+
# If nil we don't link @mention or @mentioned.
|
87
|
+
#
|
88
|
+
# Returns a string with @mentions replaced with links. All links have a
|
89
|
+
# 'user-mention' class name attached for styling.
|
90
|
+
def mention_link_filter(text, base_url='/', info_url=nil)
|
91
|
+
self.class.mentioned_logins_in(text) do |match, login, is_mentioned|
|
92
|
+
link =
|
93
|
+
if is_mentioned
|
94
|
+
link_to_mention_info(login, info_url)
|
95
|
+
else
|
96
|
+
link_to_mentioned_user(login)
|
97
|
+
end
|
98
|
+
|
99
|
+
link ? match.sub("@#{login}", link) : match
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def link_to_mention_info(text, info_url=nil)
|
104
|
+
return "@#{text}" if info_url.nil?
|
105
|
+
"<a href='#{info_url}' class='user-mention'>" +
|
106
|
+
"@#{text}" +
|
107
|
+
"</a>"
|
108
|
+
end
|
109
|
+
|
110
|
+
def link_to_mentioned_user(login)
|
111
|
+
url = File.join(base_url, login)
|
112
|
+
"<a href='#{url}' class='user-mention'>" +
|
113
|
+
"@#{login}" +
|
114
|
+
"</a>"
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|