html-pipeline 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (36) hide show
  1. data/.gitignore +19 -0
  2. data/.travis.yml +13 -0
  3. data/Gemfile +9 -0
  4. data/LICENSE +22 -0
  5. data/README.md +128 -0
  6. data/Rakefile +11 -0
  7. data/html-pipeline.gemspec +25 -0
  8. data/lib/html/pipeline.rb +130 -0
  9. data/lib/html/pipeline/@mention_filter.rb +118 -0
  10. data/lib/html/pipeline/autolink_filter.rb +22 -0
  11. data/lib/html/pipeline/body_content.rb +42 -0
  12. data/lib/html/pipeline/camo_filter.rb +64 -0
  13. data/lib/html/pipeline/email_reply_filter.rb +56 -0
  14. data/lib/html/pipeline/emoji_filter.rb +48 -0
  15. data/lib/html/pipeline/filter.rb +158 -0
  16. data/lib/html/pipeline/https_filter.rb +13 -0
  17. data/lib/html/pipeline/image_max_width_filter.rb +37 -0
  18. data/lib/html/pipeline/markdown_filter.rb +29 -0
  19. data/lib/html/pipeline/plain_text_input_filter.rb +11 -0
  20. data/lib/html/pipeline/sanitization_filter.rb +107 -0
  21. data/lib/html/pipeline/syntax_highlight_filter.rb +29 -0
  22. data/lib/html/pipeline/text_filter.rb +14 -0
  23. data/lib/html/pipeline/textile_filter.rb +21 -0
  24. data/lib/html/pipeline/toc_filter.rb +28 -0
  25. data/lib/html/pipeline/version.rb +5 -0
  26. data/test/html/pipeline/autolink_filter_test.rb +22 -0
  27. data/test/html/pipeline/camo_filter_test.rb +39 -0
  28. data/test/html/pipeline/emoji_filter_test.rb +16 -0
  29. data/test/html/pipeline/image_max_width_filter_test.rb +50 -0
  30. data/test/html/pipeline/markdown_filter_test.rb +101 -0
  31. data/test/html/pipeline/mention_filter_test.rb +158 -0
  32. data/test/html/pipeline/plain_text_input_filter_test.rb +22 -0
  33. data/test/html/pipeline/sanitization_filter_test.rb +47 -0
  34. data/test/html/pipeline/toc_filter_test.rb +47 -0
  35. data/test/test_helper.rb +38 -0
  36. metadata +221 -0
data/.gitignore ADDED
@@ -0,0 +1,19 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ bin/*
19
+ vendor/gems
data/.travis.yml ADDED
@@ -0,0 +1,13 @@
1
+ language: ruby
2
+
3
+ before_install:
4
+ - sudo apt-get update -qq
5
+ - sudo apt-get install -qq libicu-dev
6
+
7
+ script: "bundle exec rake"
8
+
9
+ rvm:
10
+ - 1.8.7
11
+ - 1.9.2
12
+ - 1.9.3
13
+ - ree
data/Gemfile ADDED
@@ -0,0 +1,9 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in html-pipeline.gemspec
4
+ gemspec
5
+
6
+ group :development do
7
+ gem 'bundler'
8
+ gem 'rake'
9
+ end
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 GitHub Inc. and Jerry Cheung
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,128 @@
1
+ # HTML::Pipeline [![Build Status](https://secure.travis-ci.org/jch/html-pipeline.png)](http://travis-ci.org/jch/html-pipeline)
2
+
3
+ GitHub HTML processing filters and utilities. This module includes a small
4
+ framework for defining DOM based content filters and applying them to user
5
+ provided content.
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ ```ruby
12
+ gem 'html-pipeline'
13
+ ```
14
+
15
+ And then execute:
16
+
17
+ ```sh
18
+ $ bundle
19
+ ```
20
+
21
+ Or install it yourself as:
22
+
23
+ ```sh
24
+ $ gem install html-pipeline
25
+ ```
26
+
27
+ ## Usage
28
+
29
+ This library provides a handful of chainable HTML filters to transform user
30
+ content into markup. A filter takes an HTML string or
31
+ `Nokogiri::HTML::DocumentFragment`, optionally manipulates it, and then
32
+ outputs the result.
33
+
34
+ For example, to transform Markdown source into Markdown HTML:
35
+
36
+ ```ruby
37
+ require 'html/pipeline'
38
+
39
+ filter = HTML::Pipeline::MarkdownFilter.new("Hi **world**!")
40
+ filter.call
41
+ ```
42
+
43
+ Filters can be combined into a pipeline which causes each filter to hand its
44
+ output to the next filter's input. So if you wanted to have content be
45
+ filtered through Markdown and be syntax highlighted, you can create the
46
+ following pipeline:
47
+
48
+ ```ruby
49
+ pipeline = HTML::Pipeline.new [
50
+ HTML::Pipeline::MarkdownFilter,
51
+ HTML::Pipeline::SyntaxHighlightFilter
52
+ ]
53
+ result = pipeline.call <<CODE
54
+ This is *great*:
55
+
56
+ some_code(:first)
57
+
58
+ CODE
59
+ result[:output].to_s
60
+ ```
61
+
62
+ Prints:
63
+
64
+ ```html
65
+ <p>This is <em>great</em>:</p>
66
+
67
+ <div class="highlight">
68
+ <pre><span class="n">some_code</span><span class="p">(</span><span class="ss">:first</span><span class="p">)</span>
69
+ </pre>
70
+ </div>
71
+ ```
72
+
73
+ Some filters take an optional **context** and/or **result** hash. These are
74
+ used to pass around arguments and metadata between filters in a pipeline. For
75
+ example, if you want don't want to use GitHub formatted Markdown, you can
76
+ pass an option in the context hash:
77
+
78
+ ```ruby
79
+ filter = HTML::Pipeline::MarkdownFilter.new("Hi **world**!", :gfm => false)
80
+ filter.call
81
+ ```
82
+
83
+ ## Filters
84
+
85
+ * `MentionFilter` - replace `@user` mentions with links
86
+ * `AutoLinkFilter` - auto_linking urls in HTML
87
+ * `CamoFilter` - replace http image urls with [camo-fied](https://github.com/github/camo) https versions
88
+ * `EmailReplyFilter` - util filter for working with emails
89
+ * `EmojiFilter` - everyone loves [emoji](http://www.emoji-cheat-sheet.com/)!
90
+ * `ImageMaxWidthFilter` - link to full size image for large images
91
+ * `MarkdownFilter` - convert markdown to html
92
+ * `PlainTextInputFilter` - html escape text and wrap the result in a div
93
+ * `SanitizationFilter` - whitelist santize user markup
94
+ * `SyntaxHighlightFilter` - code syntax highlighter with [linguist](https://github.com/github/linguist)
95
+ * `TextileFilter` - convert textile to html
96
+ * `TableOfContentsFilter` - anchor headings with name attributes
97
+
98
+ ## Development Setup
99
+
100
+ ```sh
101
+ bundle
102
+ rake test
103
+ ```
104
+
105
+ ## Contributing
106
+
107
+ 1. Fork it
108
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
109
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
110
+ 4. Push to the branch (`git push origin my-new-feature`)
111
+ 5. Create new Pull Request
112
+
113
+
114
+ ## TODO
115
+
116
+ * test whether emoji filter works on heroku
117
+ * test whether nokogiri monkey patch is still necessary
118
+
119
+ ## Contributors
120
+
121
+ * [Aman Gupta](mailto:aman@tmm1.net)
122
+ * [Jake Boxer](mailto:jake@github.com)
123
+ * [Joshua Peek](mailto:josh@joshpeek.com)
124
+ * [Kyle Neath](mailto:kneath@gmail.com)
125
+ * [Rob Sanheim](mailto:rsanheim@gmail.com)
126
+ * [Simon Rozet](mailto:simon@rozet.name)
127
+ * [Vicent Martí](mailto:tanoku@gmail.com)
128
+ * [Risk :danger: Olson](mailto:technoweenie@gmail.com)
data/Rakefile ADDED
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
+ require 'rake/testtask'
4
+
5
+ Rake::TestTask.new do |t|
6
+ t.libs << "test"
7
+ t.test_files = FileList['test/**/*_test.rb']
8
+ t.verbose = true
9
+ end
10
+
11
+ task :default => :test
@@ -0,0 +1,25 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/html/pipeline/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.name = "html-pipeline"
6
+ gem.version = HTML::Pipeline::VERSION
7
+ gem.authors = ["Ryan Tomayko", "Jerry Cheung"]
8
+ gem.email = ["ryan@github.com", "jerry@github.com"]
9
+ gem.description = %q{GitHub HTML processing filters and utilities}
10
+ gem.summary = %q{Helpers for processing content through a chain of filters}
11
+ gem.homepage = "https://github.com/jch/html-pipeline"
12
+
13
+ gem.files = `git ls-files`.split $/
14
+ gem.test_files = gem.files.grep(%r{^test})
15
+ gem.require_paths = ["lib"]
16
+
17
+ gem.add_dependency 'gemoji', '~> 1.1.1'
18
+ gem.add_dependency 'nokogiri', '~> 1.4'
19
+ gem.add_dependency 'github-markdown', '~> 0.5'
20
+ gem.add_dependency 'sanitize', '~> 2.0'
21
+ gem.add_dependency 'github-linguist', '~> 2.1'
22
+ gem.add_dependency 'rinku', '~> 1.7'
23
+ gem.add_dependency 'escape_utils', '~> 0.2'
24
+ gem.add_dependency 'activesupport', '>= 2'
25
+ end
@@ -0,0 +1,130 @@
1
+ require "nokogiri"
2
+ require "active_support/xml_mini/nokogiri" # convert Documents to hashes
3
+ require "escape_utils"
4
+
5
+ module HTML
6
+ # GitHub HTML processing filters and utilities. This module includes a small
7
+ # framework for defining DOM based content filters and applying them to user
8
+ # provided content.
9
+ #
10
+ # See HTML::Pipeline::Filter for information on building filters.
11
+ #
12
+ # Contruct a Pipeline for running multiple HTML filters. A pipeline is created once
13
+ # with one to many filters, and is then can be `call`ed many times over the course
14
+ # of its lifetime with input.
15
+ #
16
+ # filters - Array of Filter objects. Each must respond to call(doc,
17
+ # context) and return the modified DocumentFragment or a
18
+ # String containing HTML markup. Filters are performed in the
19
+ # order provided.
20
+ # default_context - The default context hash. Values specified here will be merged
21
+ # into values from the each individual pipeline run. Can NOT be
22
+ # nil. Default: empty Hash.
23
+ # result_class - The default Class of the result object for individual
24
+ # calls. Default: Hash. Protip: Pass in a Struct to get
25
+ # some semblence of type safety.
26
+ class Pipeline
27
+ autoload :VERSION, 'html/pipeline/version'
28
+ autoload :Pipeline, 'html/pipeline/pipeline'
29
+ autoload :Filter, 'html/pipeline/filter'
30
+ autoload :BodyContent, 'html/pipeline/body_content'
31
+ autoload :AutolinkFilter, 'html/pipeline/autolink_filter'
32
+ autoload :CamoFilter, 'html/pipeline/camo_filter'
33
+ autoload :EmailReplyFilter, 'html/pipeline/email_reply_filter'
34
+ autoload :EmojiFilter, 'html/pipeline/emoji_filter'
35
+ autoload :HttpsFilter, 'html/pipeline/https_filter'
36
+ autoload :ImageMaxWidthFilter, 'html/pipeline/image_max_width_filter'
37
+ autoload :MarkdownFilter, 'html/pipeline/markdown_filter'
38
+ autoload :MentionFilter, 'html/pipeline/@mention_filter'
39
+ autoload :PlainTextInputFilter, 'html/pipeline/plain_text_input_filter'
40
+ autoload :SanitizationFilter, 'html/pipeline/sanitization_filter'
41
+ autoload :SyntaxHighlightFilter, 'html/pipeline/syntax_highlight_filter'
42
+ autoload :TextileFilter, 'html/pipeline/textile_filter'
43
+ autoload :TableOfContentsFilter, 'html/pipeline/toc_filter'
44
+ autoload :TextFilter, 'html/pipeline/text_filter'
45
+
46
+ # Our DOM implementation.
47
+ DocumentFragment = Nokogiri::HTML::DocumentFragment
48
+
49
+ # Parse a String into a DocumentFragment object. When a DocumentFragment is
50
+ # provided, return it verbatim.
51
+ def self.parse(document_or_html)
52
+ document_or_html ||= ''
53
+ if document_or_html.is_a?(String)
54
+ DocumentFragment.parse(document_or_html)
55
+ else
56
+ document_or_html
57
+ end
58
+ end
59
+
60
+ # Public: Returns an Array of Filter objects for this Pipeline.
61
+ attr_reader :filters
62
+
63
+ def initialize(filters, default_context = {}, result_class = nil)
64
+ raise ArgumentError, "default_context cannot be nil" if default_context.nil?
65
+ @filters = filters.flatten.freeze
66
+ @default_context = default_context.freeze
67
+ @result_class = result_class || Hash
68
+ end
69
+
70
+ # Apply all filters in the pipeline to the given HTML.
71
+ #
72
+ # html - A String containing HTML or a DocumentFragment object.
73
+ # context - The context hash passed to each filter. See the Filter docs
74
+ # for more info on possible values. This object MUST NOT be modified
75
+ # in place by filters. Use the Result for passing state back.
76
+ # result - The result Hash passed to each filter for modification. This
77
+ # is where Filters store extracted information from the content.
78
+ #
79
+ # Returns the result Hash after being filtered by this Pipeline. Contains an
80
+ # :output key with the DocumentFragment or String HTML markup based on the
81
+ # output of the last filter in the pipeline.
82
+ def call(html, context = {}, result = nil)
83
+ context = @default_context.merge(context)
84
+ context = context.freeze
85
+ result ||= @result_class.new
86
+ result[:output] = @filters.inject(html) { |doc, filter| filter.call(doc, context, result) }
87
+ result
88
+ end
89
+
90
+ # Like call but guarantee the value returned is a DocumentFragment.
91
+ # Pipelines may return a DocumentFragment or a String. Callers that need a
92
+ # DocumentFragment should use this method.
93
+ def to_document(input, context = {}, result = nil)
94
+ result = call(input, context, result)
95
+ HTML::Pipeline.parse(result[:output])
96
+ end
97
+
98
+ # Like call but guarantee the value returned is a string of HTML markup.
99
+ def to_html(input, context = {}, result = nil)
100
+ result = call(input, context, result = nil)
101
+ output = result[:output]
102
+ if output.respond_to?(:to_html)
103
+ output.to_html
104
+ else
105
+ output.to_s
106
+ end
107
+ end
108
+ end
109
+ end
110
+
111
+ # XXX nokogiri monkey patches
112
+ class Nokogiri::XML::Node
113
+ # Work around an issue with utf-8 encoded data being erroneously converted to
114
+ # ... some other shit when replacing text nodes. See 'utf-8 output 2' in
115
+ # user_content_test.rb for details.
116
+ def replace_with_encoding_fix(replacement)
117
+ if replacement.respond_to?(:to_str)
118
+ replacement = document.fragment("<div>#{replacement}</div>").children.first.children
119
+ end
120
+ replace_without_encoding_fix(replacement)
121
+ end
122
+
123
+ alias_method :replace_without_encoding_fix, :replace
124
+ alias_method :replace, :replace_with_encoding_fix
125
+
126
+ def swap(replacement)
127
+ replace(replacement)
128
+ self
129
+ end
130
+ end
@@ -0,0 +1,118 @@
1
+ require 'set'
2
+
3
+ module HTML
4
+ class Pipeline
5
+ # HTML filter that replaces @user mentions with links. Mentions within <pre>,
6
+ # <code>, and <a> elements are ignored. Mentions that reference users that do
7
+ # not exist are ignored.
8
+ #
9
+ # Context options:
10
+ # :base_url - Used to construct links to user profile pages for each
11
+ # mention.
12
+ # :info_url - Used to link to "more info" when someone mentions @mention
13
+ # or @mentioned.
14
+ #
15
+ class MentionFilter < Filter
16
+ # Public: Find user @mentions in text. See
17
+ # MentionFilter#mention_link_filter.
18
+ #
19
+ # MentionFilter.mentioned_logins_in(text) do |match, login, is_mentioned|
20
+ # "<a href=...>#{login}</a>"
21
+ # end
22
+ #
23
+ # text - String text to search.
24
+ #
25
+ # Yields the String match, the String login name, and a Boolean determining
26
+ # if the match = "@mention[ed]". The yield's return replaces the match in
27
+ # the original text.
28
+ #
29
+ # Returns a String replaced with the return of the block.
30
+ def self.mentioned_logins_in(text)
31
+ text.gsub MentionPattern do |match|
32
+ login = $1
33
+ yield match, login, MentionLogins.include?(login.downcase)
34
+ end
35
+ end
36
+
37
+ # Pattern used to extract @mentions from text.
38
+ MentionPattern = /
39
+ (?:^|\W) # beginning of string or non-word char
40
+ @((?>[a-z0-9][a-z0-9-]*)) # @username
41
+ (?!\/) # without a trailing slash
42
+ (?=
43
+ \.+[ \t\W]| # dots followed by space or non-word character
44
+ \.+$| # dots at end of line
45
+ [^0-9a-zA-Z_.]| # non-word character except dot
46
+ $ # end of line
47
+ )
48
+ /ix
49
+
50
+ # List of username logins that, when mentioned, link to the blog post
51
+ # about @mentions instead of triggering a real mention.
52
+ MentionLogins = %w(
53
+ mention
54
+ mentions
55
+ mentioned
56
+ mentioning
57
+ )
58
+
59
+ # Don't look for mentions in text nodes that are children of these elements
60
+ IGNORE_PARENTS = %w(pre code a).to_set
61
+
62
+ def call
63
+ doc.search('text()').each do |node|
64
+ content = node.to_html
65
+ next if !content.include?('@')
66
+ next if has_ancestor?(node, IGNORE_PARENTS)
67
+ html = mention_link_filter(content, base_url, info_url)
68
+ next if html == content
69
+ node.replace(html)
70
+ end
71
+ doc
72
+ end
73
+
74
+ # The URL to provide when someone @mentions a "mention" name, such as
75
+ # @mention or @mentioned, that will give them more info on mentions.
76
+ def info_url
77
+ context[:info_url] || nil
78
+ end
79
+
80
+ # Replace user @mentions in text with links to the mentioned user's
81
+ # profile page.
82
+ #
83
+ # text - String text to replace @mention usernames in.
84
+ # base_url - The base URL used to construct user profile URLs.
85
+ # info_url - The "more info" URL used to link to more info on @mentions.
86
+ # If nil we don't link @mention or @mentioned.
87
+ #
88
+ # Returns a string with @mentions replaced with links. All links have a
89
+ # 'user-mention' class name attached for styling.
90
+ def mention_link_filter(text, base_url='/', info_url=nil)
91
+ self.class.mentioned_logins_in(text) do |match, login, is_mentioned|
92
+ link =
93
+ if is_mentioned
94
+ link_to_mention_info(login, info_url)
95
+ else
96
+ link_to_mentioned_user(login)
97
+ end
98
+
99
+ link ? match.sub("@#{login}", link) : match
100
+ end
101
+ end
102
+
103
+ def link_to_mention_info(text, info_url=nil)
104
+ return "@#{text}" if info_url.nil?
105
+ "<a href='#{info_url}' class='user-mention'>" +
106
+ "@#{text}" +
107
+ "</a>"
108
+ end
109
+
110
+ def link_to_mentioned_user(login)
111
+ url = File.join(base_url, login)
112
+ "<a href='#{url}' class='user-mention'>" +
113
+ "@#{login}" +
114
+ "</a>"
115
+ end
116
+ end
117
+ end
118
+ end