html-pipeline 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. data/.gitignore +19 -0
  2. data/.travis.yml +13 -0
  3. data/Gemfile +9 -0
  4. data/LICENSE +22 -0
  5. data/README.md +128 -0
  6. data/Rakefile +11 -0
  7. data/html-pipeline.gemspec +25 -0
  8. data/lib/html/pipeline.rb +130 -0
  9. data/lib/html/pipeline/@mention_filter.rb +118 -0
  10. data/lib/html/pipeline/autolink_filter.rb +22 -0
  11. data/lib/html/pipeline/body_content.rb +42 -0
  12. data/lib/html/pipeline/camo_filter.rb +64 -0
  13. data/lib/html/pipeline/email_reply_filter.rb +56 -0
  14. data/lib/html/pipeline/emoji_filter.rb +48 -0
  15. data/lib/html/pipeline/filter.rb +158 -0
  16. data/lib/html/pipeline/https_filter.rb +13 -0
  17. data/lib/html/pipeline/image_max_width_filter.rb +37 -0
  18. data/lib/html/pipeline/markdown_filter.rb +29 -0
  19. data/lib/html/pipeline/plain_text_input_filter.rb +11 -0
  20. data/lib/html/pipeline/sanitization_filter.rb +107 -0
  21. data/lib/html/pipeline/syntax_highlight_filter.rb +29 -0
  22. data/lib/html/pipeline/text_filter.rb +14 -0
  23. data/lib/html/pipeline/textile_filter.rb +21 -0
  24. data/lib/html/pipeline/toc_filter.rb +28 -0
  25. data/lib/html/pipeline/version.rb +5 -0
  26. data/test/html/pipeline/autolink_filter_test.rb +22 -0
  27. data/test/html/pipeline/camo_filter_test.rb +39 -0
  28. data/test/html/pipeline/emoji_filter_test.rb +16 -0
  29. data/test/html/pipeline/image_max_width_filter_test.rb +50 -0
  30. data/test/html/pipeline/markdown_filter_test.rb +101 -0
  31. data/test/html/pipeline/mention_filter_test.rb +158 -0
  32. data/test/html/pipeline/plain_text_input_filter_test.rb +22 -0
  33. data/test/html/pipeline/sanitization_filter_test.rb +47 -0
  34. data/test/html/pipeline/toc_filter_test.rb +47 -0
  35. data/test/test_helper.rb +38 -0
  36. metadata +221 -0
data/.gitignore ADDED
@@ -0,0 +1,19 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ bin/*
19
+ vendor/gems
data/.travis.yml ADDED
@@ -0,0 +1,13 @@
1
+ language: ruby
2
+
3
+ before_install:
4
+ - sudo apt-get update -qq
5
+ - sudo apt-get install -qq libicu-dev
6
+
7
+ script: "bundle exec rake"
8
+
9
+ rvm:
10
+ - 1.8.7
11
+ - 1.9.2
12
+ - 1.9.3
13
+ - ree
data/Gemfile ADDED
@@ -0,0 +1,9 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in html-pipeline.gemspec
4
+ gemspec
5
+
6
+ group :development do
7
+ gem 'bundler'
8
+ gem 'rake'
9
+ end
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 GitHub Inc. and Jerry Cheung
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,128 @@
1
+ # HTML::Pipeline [![Build Status](https://secure.travis-ci.org/jch/html-pipeline.png)](http://travis-ci.org/jch/html-pipeline)
2
+
3
+ GitHub HTML processing filters and utilities. This module includes a small
4
+ framework for defining DOM based content filters and applying them to user
5
+ provided content.
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ ```ruby
12
+ gem 'html-pipeline'
13
+ ```
14
+
15
+ And then execute:
16
+
17
+ ```sh
18
+ $ bundle
19
+ ```
20
+
21
+ Or install it yourself as:
22
+
23
+ ```sh
24
+ $ gem install html-pipeline
25
+ ```
26
+
27
+ ## Usage
28
+
29
+ This library provides a handful of chainable HTML filters to transform user
30
+ content into markup. A filter takes an HTML string or
31
+ `Nokogiri::HTML::DocumentFragment`, optionally manipulates it, and then
32
+ outputs the result.
33
+
34
+ For example, to transform Markdown source into Markdown HTML:
35
+
36
+ ```ruby
37
+ require 'html/pipeline'
38
+
39
+ filter = HTML::Pipeline::MarkdownFilter.new("Hi **world**!")
40
+ filter.call
41
+ ```
42
+
43
+ Filters can be combined into a pipeline which causes each filter to hand its
44
+ output to the next filter's input. So if you wanted to have content be
45
+ filtered through Markdown and be syntax highlighted, you can create the
46
+ following pipeline:
47
+
48
+ ```ruby
49
+ pipeline = HTML::Pipeline.new [
50
+ HTML::Pipeline::MarkdownFilter,
51
+ HTML::Pipeline::SyntaxHighlightFilter
52
+ ]
53
+ result = pipeline.call <<CODE
54
+ This is *great*:
55
+
56
+ some_code(:first)
57
+
58
+ CODE
59
+ result[:output].to_s
60
+ ```
61
+
62
+ Prints:
63
+
64
+ ```html
65
+ <p>This is <em>great</em>:</p>
66
+
67
+ <div class="highlight">
68
+ <pre><span class="n">some_code</span><span class="p">(</span><span class="ss">:first</span><span class="p">)</span>
69
+ </pre>
70
+ </div>
71
+ ```
72
+
73
+ Some filters take an optional **context** and/or **result** hash. These are
74
+ used to pass around arguments and metadata between filters in a pipeline. For
75
+ example, if you want don't want to use GitHub formatted Markdown, you can
76
+ pass an option in the context hash:
77
+
78
+ ```ruby
79
+ filter = HTML::Pipeline::MarkdownFilter.new("Hi **world**!", :gfm => false)
80
+ filter.call
81
+ ```
82
+
83
+ ## Filters
84
+
85
+ * `MentionFilter` - replace `@user` mentions with links
86
+ * `AutoLinkFilter` - auto_linking urls in HTML
87
+ * `CamoFilter` - replace http image urls with [camo-fied](https://github.com/github/camo) https versions
88
+ * `EmailReplyFilter` - util filter for working with emails
89
+ * `EmojiFilter` - everyone loves [emoji](http://www.emoji-cheat-sheet.com/)!
90
+ * `ImageMaxWidthFilter` - link to full size image for large images
91
+ * `MarkdownFilter` - convert markdown to html
92
+ * `PlainTextInputFilter` - html escape text and wrap the result in a div
93
+ * `SanitizationFilter` - whitelist santize user markup
94
+ * `SyntaxHighlightFilter` - code syntax highlighter with [linguist](https://github.com/github/linguist)
95
+ * `TextileFilter` - convert textile to html
96
+ * `TableOfContentsFilter` - anchor headings with name attributes
97
+
98
+ ## Development Setup
99
+
100
+ ```sh
101
+ bundle
102
+ rake test
103
+ ```
104
+
105
+ ## Contributing
106
+
107
+ 1. Fork it
108
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
109
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
110
+ 4. Push to the branch (`git push origin my-new-feature`)
111
+ 5. Create new Pull Request
112
+
113
+
114
+ ## TODO
115
+
116
+ * test whether emoji filter works on heroku
117
+ * test whether nokogiri monkey patch is still necessary
118
+
119
+ ## Contributors
120
+
121
+ * [Aman Gupta](mailto:aman@tmm1.net)
122
+ * [Jake Boxer](mailto:jake@github.com)
123
+ * [Joshua Peek](mailto:josh@joshpeek.com)
124
+ * [Kyle Neath](mailto:kneath@gmail.com)
125
+ * [Rob Sanheim](mailto:rsanheim@gmail.com)
126
+ * [Simon Rozet](mailto:simon@rozet.name)
127
+ * [Vicent Martí](mailto:tanoku@gmail.com)
128
+ * [Risk :danger: Olson](mailto:technoweenie@gmail.com)
data/Rakefile ADDED
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
+ require 'rake/testtask'
4
+
5
+ Rake::TestTask.new do |t|
6
+ t.libs << "test"
7
+ t.test_files = FileList['test/**/*_test.rb']
8
+ t.verbose = true
9
+ end
10
+
11
+ task :default => :test
@@ -0,0 +1,25 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/html/pipeline/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.name = "html-pipeline"
6
+ gem.version = HTML::Pipeline::VERSION
7
+ gem.authors = ["Ryan Tomayko", "Jerry Cheung"]
8
+ gem.email = ["ryan@github.com", "jerry@github.com"]
9
+ gem.description = %q{GitHub HTML processing filters and utilities}
10
+ gem.summary = %q{Helpers for processing content through a chain of filters}
11
+ gem.homepage = "https://github.com/jch/html-pipeline"
12
+
13
+ gem.files = `git ls-files`.split $/
14
+ gem.test_files = gem.files.grep(%r{^test})
15
+ gem.require_paths = ["lib"]
16
+
17
+ gem.add_dependency 'gemoji', '~> 1.1.1'
18
+ gem.add_dependency 'nokogiri', '~> 1.4'
19
+ gem.add_dependency 'github-markdown', '~> 0.5'
20
+ gem.add_dependency 'sanitize', '~> 2.0'
21
+ gem.add_dependency 'github-linguist', '~> 2.1'
22
+ gem.add_dependency 'rinku', '~> 1.7'
23
+ gem.add_dependency 'escape_utils', '~> 0.2'
24
+ gem.add_dependency 'activesupport', '>= 2'
25
+ end
@@ -0,0 +1,130 @@
1
+ require "nokogiri"
2
+ require "active_support/xml_mini/nokogiri" # convert Documents to hashes
3
+ require "escape_utils"
4
+
5
+ module HTML
6
+ # GitHub HTML processing filters and utilities. This module includes a small
7
+ # framework for defining DOM based content filters and applying them to user
8
+ # provided content.
9
+ #
10
+ # See HTML::Pipeline::Filter for information on building filters.
11
+ #
12
+ # Contruct a Pipeline for running multiple HTML filters. A pipeline is created once
13
+ # with one to many filters, and is then can be `call`ed many times over the course
14
+ # of its lifetime with input.
15
+ #
16
+ # filters - Array of Filter objects. Each must respond to call(doc,
17
+ # context) and return the modified DocumentFragment or a
18
+ # String containing HTML markup. Filters are performed in the
19
+ # order provided.
20
+ # default_context - The default context hash. Values specified here will be merged
21
+ # into values from the each individual pipeline run. Can NOT be
22
+ # nil. Default: empty Hash.
23
+ # result_class - The default Class of the result object for individual
24
+ # calls. Default: Hash. Protip: Pass in a Struct to get
25
+ # some semblence of type safety.
26
+ class Pipeline
27
+ autoload :VERSION, 'html/pipeline/version'
28
+ autoload :Pipeline, 'html/pipeline/pipeline'
29
+ autoload :Filter, 'html/pipeline/filter'
30
+ autoload :BodyContent, 'html/pipeline/body_content'
31
+ autoload :AutolinkFilter, 'html/pipeline/autolink_filter'
32
+ autoload :CamoFilter, 'html/pipeline/camo_filter'
33
+ autoload :EmailReplyFilter, 'html/pipeline/email_reply_filter'
34
+ autoload :EmojiFilter, 'html/pipeline/emoji_filter'
35
+ autoload :HttpsFilter, 'html/pipeline/https_filter'
36
+ autoload :ImageMaxWidthFilter, 'html/pipeline/image_max_width_filter'
37
+ autoload :MarkdownFilter, 'html/pipeline/markdown_filter'
38
+ autoload :MentionFilter, 'html/pipeline/@mention_filter'
39
+ autoload :PlainTextInputFilter, 'html/pipeline/plain_text_input_filter'
40
+ autoload :SanitizationFilter, 'html/pipeline/sanitization_filter'
41
+ autoload :SyntaxHighlightFilter, 'html/pipeline/syntax_highlight_filter'
42
+ autoload :TextileFilter, 'html/pipeline/textile_filter'
43
+ autoload :TableOfContentsFilter, 'html/pipeline/toc_filter'
44
+ autoload :TextFilter, 'html/pipeline/text_filter'
45
+
46
+ # Our DOM implementation.
47
+ DocumentFragment = Nokogiri::HTML::DocumentFragment
48
+
49
+ # Parse a String into a DocumentFragment object. When a DocumentFragment is
50
+ # provided, return it verbatim.
51
+ def self.parse(document_or_html)
52
+ document_or_html ||= ''
53
+ if document_or_html.is_a?(String)
54
+ DocumentFragment.parse(document_or_html)
55
+ else
56
+ document_or_html
57
+ end
58
+ end
59
+
60
+ # Public: Returns an Array of Filter objects for this Pipeline.
61
+ attr_reader :filters
62
+
63
+ def initialize(filters, default_context = {}, result_class = nil)
64
+ raise ArgumentError, "default_context cannot be nil" if default_context.nil?
65
+ @filters = filters.flatten.freeze
66
+ @default_context = default_context.freeze
67
+ @result_class = result_class || Hash
68
+ end
69
+
70
+ # Apply all filters in the pipeline to the given HTML.
71
+ #
72
+ # html - A String containing HTML or a DocumentFragment object.
73
+ # context - The context hash passed to each filter. See the Filter docs
74
+ # for more info on possible values. This object MUST NOT be modified
75
+ # in place by filters. Use the Result for passing state back.
76
+ # result - The result Hash passed to each filter for modification. This
77
+ # is where Filters store extracted information from the content.
78
+ #
79
+ # Returns the result Hash after being filtered by this Pipeline. Contains an
80
+ # :output key with the DocumentFragment or String HTML markup based on the
81
+ # output of the last filter in the pipeline.
82
+ def call(html, context = {}, result = nil)
83
+ context = @default_context.merge(context)
84
+ context = context.freeze
85
+ result ||= @result_class.new
86
+ result[:output] = @filters.inject(html) { |doc, filter| filter.call(doc, context, result) }
87
+ result
88
+ end
89
+
90
+ # Like call but guarantee the value returned is a DocumentFragment.
91
+ # Pipelines may return a DocumentFragment or a String. Callers that need a
92
+ # DocumentFragment should use this method.
93
+ def to_document(input, context = {}, result = nil)
94
+ result = call(input, context, result)
95
+ HTML::Pipeline.parse(result[:output])
96
+ end
97
+
98
+ # Like call but guarantee the value returned is a string of HTML markup.
99
+ def to_html(input, context = {}, result = nil)
100
+ result = call(input, context, result = nil)
101
+ output = result[:output]
102
+ if output.respond_to?(:to_html)
103
+ output.to_html
104
+ else
105
+ output.to_s
106
+ end
107
+ end
108
+ end
109
+ end
110
+
111
+ # XXX nokogiri monkey patches
112
+ class Nokogiri::XML::Node
113
+ # Work around an issue with utf-8 encoded data being erroneously converted to
114
+ # ... some other shit when replacing text nodes. See 'utf-8 output 2' in
115
+ # user_content_test.rb for details.
116
+ def replace_with_encoding_fix(replacement)
117
+ if replacement.respond_to?(:to_str)
118
+ replacement = document.fragment("<div>#{replacement}</div>").children.first.children
119
+ end
120
+ replace_without_encoding_fix(replacement)
121
+ end
122
+
123
+ alias_method :replace_without_encoding_fix, :replace
124
+ alias_method :replace, :replace_with_encoding_fix
125
+
126
+ def swap(replacement)
127
+ replace(replacement)
128
+ self
129
+ end
130
+ end
@@ -0,0 +1,118 @@
1
+ require 'set'
2
+
3
+ module HTML
4
+ class Pipeline
5
+ # HTML filter that replaces @user mentions with links. Mentions within <pre>,
6
+ # <code>, and <a> elements are ignored. Mentions that reference users that do
7
+ # not exist are ignored.
8
+ #
9
+ # Context options:
10
+ # :base_url - Used to construct links to user profile pages for each
11
+ # mention.
12
+ # :info_url - Used to link to "more info" when someone mentions @mention
13
+ # or @mentioned.
14
+ #
15
+ class MentionFilter < Filter
16
+ # Public: Find user @mentions in text. See
17
+ # MentionFilter#mention_link_filter.
18
+ #
19
+ # MentionFilter.mentioned_logins_in(text) do |match, login, is_mentioned|
20
+ # "<a href=...>#{login}</a>"
21
+ # end
22
+ #
23
+ # text - String text to search.
24
+ #
25
+ # Yields the String match, the String login name, and a Boolean determining
26
+ # if the match = "@mention[ed]". The yield's return replaces the match in
27
+ # the original text.
28
+ #
29
+ # Returns a String replaced with the return of the block.
30
+ def self.mentioned_logins_in(text)
31
+ text.gsub MentionPattern do |match|
32
+ login = $1
33
+ yield match, login, MentionLogins.include?(login.downcase)
34
+ end
35
+ end
36
+
37
+ # Pattern used to extract @mentions from text.
38
+ MentionPattern = /
39
+ (?:^|\W) # beginning of string or non-word char
40
+ @((?>[a-z0-9][a-z0-9-]*)) # @username
41
+ (?!\/) # without a trailing slash
42
+ (?=
43
+ \.+[ \t\W]| # dots followed by space or non-word character
44
+ \.+$| # dots at end of line
45
+ [^0-9a-zA-Z_.]| # non-word character except dot
46
+ $ # end of line
47
+ )
48
+ /ix
49
+
50
+ # List of username logins that, when mentioned, link to the blog post
51
+ # about @mentions instead of triggering a real mention.
52
+ MentionLogins = %w(
53
+ mention
54
+ mentions
55
+ mentioned
56
+ mentioning
57
+ )
58
+
59
+ # Don't look for mentions in text nodes that are children of these elements
60
+ IGNORE_PARENTS = %w(pre code a).to_set
61
+
62
+ def call
63
+ doc.search('text()').each do |node|
64
+ content = node.to_html
65
+ next if !content.include?('@')
66
+ next if has_ancestor?(node, IGNORE_PARENTS)
67
+ html = mention_link_filter(content, base_url, info_url)
68
+ next if html == content
69
+ node.replace(html)
70
+ end
71
+ doc
72
+ end
73
+
74
+ # The URL to provide when someone @mentions a "mention" name, such as
75
+ # @mention or @mentioned, that will give them more info on mentions.
76
+ def info_url
77
+ context[:info_url] || nil
78
+ end
79
+
80
+ # Replace user @mentions in text with links to the mentioned user's
81
+ # profile page.
82
+ #
83
+ # text - String text to replace @mention usernames in.
84
+ # base_url - The base URL used to construct user profile URLs.
85
+ # info_url - The "more info" URL used to link to more info on @mentions.
86
+ # If nil we don't link @mention or @mentioned.
87
+ #
88
+ # Returns a string with @mentions replaced with links. All links have a
89
+ # 'user-mention' class name attached for styling.
90
+ def mention_link_filter(text, base_url='/', info_url=nil)
91
+ self.class.mentioned_logins_in(text) do |match, login, is_mentioned|
92
+ link =
93
+ if is_mentioned
94
+ link_to_mention_info(login, info_url)
95
+ else
96
+ link_to_mentioned_user(login)
97
+ end
98
+
99
+ link ? match.sub("@#{login}", link) : match
100
+ end
101
+ end
102
+
103
+ def link_to_mention_info(text, info_url=nil)
104
+ return "@#{text}" if info_url.nil?
105
+ "<a href='#{info_url}' class='user-mention'>" +
106
+ "@#{text}" +
107
+ "</a>"
108
+ end
109
+
110
+ def link_to_mentioned_user(login)
111
+ url = File.join(base_url, login)
112
+ "<a href='#{url}' class='user-mention'>" +
113
+ "@#{login}" +
114
+ "</a>"
115
+ end
116
+ end
117
+ end
118
+ end