html-pipeline-no-charlock 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. data/.gitignore +19 -0
  2. data/.travis.yml +13 -0
  3. data/CHANGELOG.md +16 -0
  4. data/Gemfile +9 -0
  5. data/LICENSE +22 -0
  6. data/README.md +221 -0
  7. data/Rakefile +13 -0
  8. data/html-pipeline-no-charlock.gemspec +25 -0
  9. data/html-pipeline.gemspec +26 -0
  10. data/lib/html/pipeline.rb +130 -0
  11. data/lib/html/pipeline/@mention_filter.rb +118 -0
  12. data/lib/html/pipeline/autolink_filter.rb +22 -0
  13. data/lib/html/pipeline/body_content.rb +42 -0
  14. data/lib/html/pipeline/camo_filter.rb +70 -0
  15. data/lib/html/pipeline/email_reply_filter.rb +56 -0
  16. data/lib/html/pipeline/emoji_filter.rb +54 -0
  17. data/lib/html/pipeline/filter.rb +178 -0
  18. data/lib/html/pipeline/https_filter.rb +13 -0
  19. data/lib/html/pipeline/image_max_width_filter.rb +37 -0
  20. data/lib/html/pipeline/markdown_filter.rb +29 -0
  21. data/lib/html/pipeline/plain_text_input_filter.rb +11 -0
  22. data/lib/html/pipeline/sanitization_filter.rb +105 -0
  23. data/lib/html/pipeline/syntax_highlight_filter.rb +29 -0
  24. data/lib/html/pipeline/text_filter.rb +14 -0
  25. data/lib/html/pipeline/textile_filter.rb +21 -0
  26. data/lib/html/pipeline/toc_filter.rb +28 -0
  27. data/lib/html/pipeline/version.rb +5 -0
  28. data/test/html/pipeline/autolink_filter_test.rb +22 -0
  29. data/test/html/pipeline/camo_filter_test.rb +47 -0
  30. data/test/html/pipeline/emoji_filter_test.rb +18 -0
  31. data/test/html/pipeline/image_max_width_filter_test.rb +50 -0
  32. data/test/html/pipeline/markdown_filter_test.rb +101 -0
  33. data/test/html/pipeline/mention_filter_test.rb +158 -0
  34. data/test/html/pipeline/plain_text_input_filter_test.rb +22 -0
  35. data/test/html/pipeline/sanitization_filter_test.rb +47 -0
  36. data/test/html/pipeline/toc_filter_test.rb +47 -0
  37. data/test/test_helper.rb +38 -0
  38. metadata +214 -0
@@ -0,0 +1,118 @@
1
+ require 'set'
2
+
3
+ module HTML
4
+ class Pipeline
5
+ # HTML filter that replaces @user mentions with links. Mentions within <pre>,
6
+ # <code>, and <a> elements are ignored. Mentions that reference users that do
7
+ # not exist are ignored.
8
+ #
9
+ # Context options:
10
+ # :base_url - Used to construct links to user profile pages for each
11
+ # mention.
12
+ # :info_url - Used to link to "more info" when someone mentions @mention
13
+ # or @mentioned.
14
+ #
15
+ class MentionFilter < Filter
16
+ # Public: Find user @mentions in text. See
17
+ # MentionFilter#mention_link_filter.
18
+ #
19
+ # MentionFilter.mentioned_logins_in(text) do |match, login, is_mentioned|
20
+ # "<a href=...>#{login}</a>"
21
+ # end
22
+ #
23
+ # text - String text to search.
24
+ #
25
+ # Yields the String match, the String login name, and a Boolean determining
26
+ # if the match = "@mention[ed]". The yield's return replaces the match in
27
+ # the original text.
28
+ #
29
+ # Returns a String replaced with the return of the block.
30
+ def self.mentioned_logins_in(text)
31
+ text.gsub MentionPattern do |match|
32
+ login = $1
33
+ yield match, login, MentionLogins.include?(login.downcase)
34
+ end
35
+ end
36
+
37
+ # Pattern used to extract @mentions from text.
38
+ MentionPattern = /
39
+ (?:^|\W) # beginning of string or non-word char
40
+ @((?>[a-z0-9][a-z0-9-]*)) # @username
41
+ (?!\/) # without a trailing slash
42
+ (?=
43
+ \.+[ \t\W]| # dots followed by space or non-word character
44
+ \.+$| # dots at end of line
45
+ [^0-9a-zA-Z_.]| # non-word character except dot
46
+ $ # end of line
47
+ )
48
+ /ix
49
+
50
+ # List of username logins that, when mentioned, link to the blog post
51
+ # about @mentions instead of triggering a real mention.
52
+ MentionLogins = %w(
53
+ mention
54
+ mentions
55
+ mentioned
56
+ mentioning
57
+ )
58
+
59
+ # Don't look for mentions in text nodes that are children of these elements
60
+ IGNORE_PARENTS = %w(pre code a).to_set
61
+
62
+ def call
63
+ doc.search('text()').each do |node|
64
+ content = node.to_html
65
+ next if !content.include?('@')
66
+ next if has_ancestor?(node, IGNORE_PARENTS)
67
+ html = mention_link_filter(content, base_url, info_url)
68
+ next if html == content
69
+ node.replace(html)
70
+ end
71
+ doc
72
+ end
73
+
74
+ # The URL to provide when someone @mentions a "mention" name, such as
75
+ # @mention or @mentioned, that will give them more info on mentions.
76
+ def info_url
77
+ context[:info_url] || nil
78
+ end
79
+
80
+ # Replace user @mentions in text with links to the mentioned user's
81
+ # profile page.
82
+ #
83
+ # text - String text to replace @mention usernames in.
84
+ # base_url - The base URL used to construct user profile URLs.
85
+ # info_url - The "more info" URL used to link to more info on @mentions.
86
+ # If nil we don't link @mention or @mentioned.
87
+ #
88
+ # Returns a string with @mentions replaced with links. All links have a
89
+ # 'user-mention' class name attached for styling.
90
+ def mention_link_filter(text, base_url='/', info_url=nil)
91
+ self.class.mentioned_logins_in(text) do |match, login, is_mentioned|
92
+ link =
93
+ if is_mentioned
94
+ link_to_mention_info(login, info_url)
95
+ else
96
+ link_to_mentioned_user(login)
97
+ end
98
+
99
+ link ? match.sub("@#{login}", link) : match
100
+ end
101
+ end
102
+
103
+ def link_to_mention_info(text, info_url=nil)
104
+ return "@#{text}" if info_url.nil?
105
+ "<a href='#{info_url}' class='user-mention'>" +
106
+ "@#{text}" +
107
+ "</a>"
108
+ end
109
+
110
+ def link_to_mentioned_user(login)
111
+ url = File.join(base_url, login)
112
+ "<a href='#{url}' class='user-mention'>" +
113
+ "@#{login}" +
114
+ "</a>"
115
+ end
116
+ end
117
+ end
118
+ end
@@ -0,0 +1,22 @@
1
+ require 'rinku'
2
+
3
+ module HTML
4
+ class Pipeline
5
+ # HTML Filter for auto_linking urls in HTML.
6
+ #
7
+ # Context options:
8
+ # :autolink - boolean whether to autolink urls
9
+ # :flags - additional Rinku flags. See https://github.com/vmg/rinku
10
+ #
11
+ # This filter does not write additional information to the context.
12
+ class AutolinkFilter < Filter
13
+ def call
14
+ return html if context[:autolink] == false
15
+ flags = 0
16
+ flags |= context[:flags] if context[:flags]
17
+
18
+ Rinku.auto_link(html, :urls, nil, %w[a script kbd pre code], flags)
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,42 @@
1
+ module HTML
2
+ class Pipeline
3
+ # Public: Runs a String of content through an HTML processing pipeline,
4
+ # providing easy access to a generated DocumentFragment.
5
+ class BodyContent
6
+ attr_reader :result
7
+
8
+ # Public: Initialize a BodyContent.
9
+ #
10
+ # body - A String body.
11
+ # context - A Hash of context options for the filters.
12
+ # pipeline - A HTML::Pipeline object with one or more Filters.
13
+ def initialize(body, context, pipeline)
14
+ @body = body
15
+ @context = context
16
+ @pipeline = pipeline
17
+ end
18
+
19
+ # Public: Gets the memoized result of the body content as it passed through
20
+ # the Pipeline.
21
+ #
22
+ # Returns a Hash, or something similar as defined by @pipeline.result_class.
23
+ def result
24
+ @result ||= @pipeline.call @body, @context
25
+ end
26
+
27
+ # Public: Gets the updated body from the Pipeline result.
28
+ #
29
+ # Returns a String or DocumentFragment.
30
+ def output
31
+ @output ||= result[:output]
32
+ end
33
+
34
+ # Public: Parses the output into a DocumentFragment.
35
+ #
36
+ # Returns a DocumentFragment.
37
+ def document
38
+ @document ||= HTML::Pipeline.parse output
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,70 @@
1
+ require 'openssl'
2
+
3
+ module HTML
4
+ class Pipeline
5
+ # HTML Filter for replacing http image URLs with camo versions. See:
6
+ #
7
+ # https://github.com/atmos/camo
8
+ #
9
+ # All images provided in user content should be run through this
10
+ # filter so that http image sources do not cause mixed-content warnings
11
+ # in browser clients.
12
+ #
13
+ # Context options:
14
+ # :asset_proxy (required) - Base URL for constructed asset proxy URLs.
15
+ # :asset_proxy_secret_key (required) - The shared secret used to encode URLs.
16
+ #
17
+ # This filter does not write additional information to the context.
18
+ class CamoFilter < Filter
19
+ # Hijacks images in the markup provided, replacing them with URLs that
20
+ # go through the github asset proxy.
21
+ def call
22
+ doc.search("img").each do |element|
23
+ next if element['src'].nil?
24
+ src = element['src'].strip
25
+ src = src.sub(%r!^http://github.com!, 'https://github.com')
26
+ next if context[:disable_asset_proxy]
27
+
28
+ if src =~ /^http:/ || src =~ /^https:\/\/img.skitch.com\//
29
+ element['src'] = asset_proxy_url(src)
30
+ else
31
+ element['src'] = src
32
+ end
33
+ end
34
+ doc
35
+ end
36
+
37
+ # Implementation of validate hook.
38
+ # Errors should raise exceptions or use an existing validator.
39
+ def validate
40
+ needs :asset_proxy, :asset_proxy_secret_key
41
+ end
42
+
43
+ # The camouflaged URL for a given image URL.
44
+ def asset_proxy_url(url)
45
+ "#{asset_proxy_host}/#{asset_url_hash(url)}/#{hexencode(url)}"
46
+ end
47
+
48
+ # Private: calculate the HMAC digest for a image source URL.
49
+ def asset_url_hash(url)
50
+ digest = OpenSSL::Digest::Digest.new('sha1')
51
+ OpenSSL::HMAC.hexdigest(digest, asset_proxy_secret_key, url)
52
+ end
53
+
54
+ # Private: the hostname to use for generated asset proxied URLs.
55
+ def asset_proxy_host
56
+ context[:asset_proxy]
57
+ end
58
+
59
+ def asset_proxy_secret_key
60
+ context[:asset_proxy_secret_key]
61
+ end
62
+
63
+ # Private: helper to hexencode a string. Each byte ends up encoded into
64
+ # two characters, zero padded value in the range [0-9a-f].
65
+ def hexencode(str)
66
+ str.to_enum(:each_byte).map { |byte| "%02x" % byte }.join
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,56 @@
1
+ module HTML
2
+ class Pipeline
3
+ # HTML Filter that converts email reply text into an HTML DocumentFragment.
4
+ # It must be used as the first filter in a pipeline.
5
+ #
6
+ # Context options:
7
+ # None
8
+ #
9
+ # This filter does not write any additional information to the context hash.
10
+ class EmailReplyFilter < TextFilter
11
+ include EscapeUtils
12
+
13
+ EMAIL_HIDDEN_HEADER = %(<span class="email-hidden-toggle"><a href="#">&hellip;</a></span><div class="email-hidden-reply" style="display:none">).freeze
14
+ EMAIL_QUOTED_HEADER = %(<div class="email-quoted-reply">).freeze
15
+ EMAIL_SIGNATURE_HEADER = %(<div class="email-signature-reply">).freeze
16
+ EMAIL_FRAGMENT_HEADER = %(<div class="email-fragment">).freeze
17
+ EMAIL_HEADER_END = "</div>".freeze
18
+
19
+ # Scans an email body to determine which bits are quoted and which should
20
+ # be hidden. EmailReplyParser is used to split the comment into an Array
21
+ # of quoted or unquoted Blocks. Now, we loop through them and attempt to
22
+ # add <div> tags around them so we can hide the hidden blocks, and style
23
+ # the quoted blocks differently. Since multiple blocks may be hidden, be
24
+ # sure to keep the "email-hidden-reply" <div>s around "email-quoted-reply"
25
+ # <div> tags. Call this on each comment of a visible thread in the order
26
+ # that they are displayed. Note: all comments are processed so we can
27
+ # maintain a Set of SHAs of paragraphs. Only plaintext comments skip the
28
+ # markdown step.
29
+ #
30
+ # Returns the email comment HTML as a String
31
+ def call
32
+ found_hidden = nil
33
+ paragraphs = EmailReplyParser.read(text.dup).fragments.map do |fragment|
34
+ pieces = [escape_html(fragment.to_s.strip).gsub(/^\s*(>|&gt;)/, '')]
35
+ if fragment.quoted?
36
+ pieces.unshift EMAIL_QUOTED_HEADER
37
+ pieces << EMAIL_HEADER_END
38
+ elsif fragment.signature?
39
+ pieces.unshift EMAIL_SIGNATURE_HEADER
40
+ pieces << EMAIL_HEADER_END
41
+ else
42
+ pieces.unshift EMAIL_FRAGMENT_HEADER
43
+ pieces << EMAIL_HEADER_END
44
+ end
45
+ if fragment.hidden? && !found_hidden
46
+ found_hidden = true
47
+ pieces.unshift EMAIL_HIDDEN_HEADER
48
+ end
49
+ pieces.join
50
+ end
51
+ paragraphs << EMAIL_HEADER_END if found_hidden
52
+ paragraphs.join("\n")
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,54 @@
1
+ require 'emoji'
2
+
3
+ module HTML
4
+ class Pipeline
5
+ # HTML filter that replaces :emoji: with images.
6
+ #
7
+ # Context:
8
+ # :asset_root (required) - base url to link to emoji sprite
9
+ class EmojiFilter < Filter
10
+ # Build a regexp that matches all valid :emoji: names.
11
+ EmojiPattern = /:(#{Emoji.names.map { |name| Regexp.escape(name) }.join('|')}):/
12
+
13
+ def call
14
+ doc.search('text()').each do |node|
15
+ content = node.to_html
16
+ next if !content.include?(':')
17
+ next if has_ancestor?(node, %w(pre code))
18
+ html = emoji_image_filter(content)
19
+ next if html == content
20
+ node.replace(html)
21
+ end
22
+ doc
23
+ end
24
+
25
+ # Implementation of validate hook.
26
+ # Errors should raise exceptions or use an existing validator.
27
+ def validate
28
+ needs :asset_root
29
+ end
30
+
31
+ # Replace :emoji: with corresponding images.
32
+ #
33
+ # text - String text to replace :emoji: in.
34
+ #
35
+ # Returns a String with :emoji: replaced with images.
36
+ def emoji_image_filter(text)
37
+ return text unless text.include?(':')
38
+
39
+ text.gsub EmojiPattern do |match|
40
+ name = $1
41
+ "<img class='emoji' title=':#{name}:' alt=':#{name}:' src='#{File.join(asset_root, "emoji", "#{name}.png")}' height='20' width='20' align='absmiddle' />"
42
+ end
43
+ end
44
+
45
+ # The base url to link emoji sprites
46
+ #
47
+ # Raises ArgumentError if context option has not been provided.
48
+ # Returns the context's asset_root.
49
+ def asset_root
50
+ context[:asset_root]
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,178 @@
1
+ module HTML
2
+ class Pipeline
3
+ # Base class for user content HTML filters. Each filter takes an
4
+ # HTML string or Nokogiri::HTML::DocumentFragment, performs
5
+ # modifications and/or writes information to the result hash. Filters must
6
+ # return a DocumentFragment (typically the same instance provided to the call
7
+ # method) or a String with HTML markup.
8
+ #
9
+ # Example filter that replaces all images with trollface:
10
+ #
11
+ # class FuuuFilter < HTML::Pipeline::Filter
12
+ # def call
13
+ # doc.search('img').each do |img|
14
+ # img['src'] = "http://paradoxdgn.com/junk/avatars/trollface.jpg"
15
+ # end
16
+ # end
17
+ # end
18
+ #
19
+ # The context Hash passes options to filters and should not be changed in
20
+ # place. A Result Hash allows filters to make extracted information
21
+ # available to the caller and is mutable.
22
+ #
23
+ # Common context options:
24
+ # :base_url - The site's base URL
25
+ # :repository - A Repository providing context for the HTML being processed
26
+ #
27
+ # Each filter may define additional options and output values. See the class
28
+ # docs for more info.
29
+ class Filter
30
+ class InvalidDocumentException < StandardError; end
31
+
32
+ def initialize(doc, context = nil, result = nil)
33
+ if doc.kind_of?(String)
34
+ @html = doc.to_str
35
+ @doc = nil
36
+ else
37
+ @doc = doc
38
+ @html = nil
39
+ end
40
+ @context = context || {}
41
+ @result = result || {}
42
+ validate
43
+ end
44
+
45
+ # Public: Returns a simple Hash used to pass extra information into filters
46
+ # and also to allow filters to make extracted information available to the
47
+ # caller.
48
+ attr_reader :context
49
+
50
+ # Public: Returns a Hash used to allow filters to pass back information
51
+ # to callers of the various Pipelines. This can be used for
52
+ # #mentioned_users, for example.
53
+ attr_reader :result
54
+
55
+ # The Nokogiri::HTML::DocumentFragment to be manipulated. If the filter was
56
+ # provided a String, parse into a DocumentFragment the first time this
57
+ # method is called.
58
+ def doc
59
+ @doc ||= parse_html(html)
60
+ end
61
+
62
+ # The String representation of the document. If a DocumentFragment was
63
+ # provided to the Filter, it is serialized into a String when this method is
64
+ # called.
65
+ def html
66
+ raise InvalidDocumentException if @html.nil? && @doc.nil?
67
+ @html || doc.to_html
68
+ end
69
+
70
+ # The main filter entry point. The doc attribute is guaranteed to be a
71
+ # Nokogiri::HTML::DocumentFragment when invoked. Subclasses should modify
72
+ # this document in place or extract information and add it to the context
73
+ # hash.
74
+ def call
75
+ raise NotImplementedError
76
+ end
77
+
78
+ # Make sure the context has everything we need. Noop: Subclasses can override.
79
+ def validate
80
+ end
81
+
82
+ # The Repository object provided in the context hash, or nil when no
83
+ # :repository was specified.
84
+ #
85
+ # It's assumed that the repository context has already been checked
86
+ # for permissions
87
+ def repository
88
+ context[:repository]
89
+ end
90
+
91
+ # The User object provided in the context hash, or nil when no user
92
+ # was specified
93
+ def current_user
94
+ context[:current_user]
95
+ end
96
+
97
+ # Return whether the filter can access a given repo while
98
+ # applying a filter
99
+ #
100
+ # A repo can only be accessed if its pullable by the user who
101
+ # submitted the content of this filter, or if it's the same as
102
+ # the repository context in which the filter runs
103
+ def can_access_repo?(repo)
104
+ return false if repo.nil?
105
+ return true if repo == repository
106
+ repo.pullable_by?(current_user)
107
+ end
108
+
109
+ # The site's base URL provided in the context hash, or '/' when no
110
+ # base URL was specified.
111
+ def base_url
112
+ context[:base_url] || '/'
113
+ end
114
+
115
+ # Ensure the passed argument is a DocumentFragment. When a string is
116
+ # provided, it is parsed and returned; otherwise, the DocumentFragment is
117
+ # returned unmodified.
118
+ def parse_html(html)
119
+ HTML::Pipeline.parse(html)
120
+ end
121
+
122
+ # Helper method for filter subclasses used to determine if any of a node's
123
+ # ancestors have one of the tag names specified.
124
+ #
125
+ # node - The Node object to check.
126
+ # tags - An array of tag name strings to check. These should be downcase.
127
+ #
128
+ # Returns true when the node has a matching ancestor.
129
+ def has_ancestor?(node, tags)
130
+ while node = node.parent
131
+ if tags.include?(node.name.downcase)
132
+ break true
133
+ end
134
+ end
135
+ end
136
+
137
+ # Perform a filter on doc with the given context.
138
+ #
139
+ # Returns a HTML::Pipeline::DocumentFragment or a String containing HTML
140
+ # markup.
141
+ def self.call(doc, context = nil, result = nil)
142
+ new(doc, context, result).call
143
+ end
144
+
145
+ # Like call but guarantees that a DocumentFragment is returned, even when
146
+ # the last filter returns a String.
147
+ def self.to_document(input, context = nil)
148
+ html = call(input, context)
149
+ HTML::Pipeline::parse(html)
150
+ end
151
+
152
+ # Like call but guarantees that a string of HTML markup is returned.
153
+ def self.to_html(input, context = nil)
154
+ output = call(input, context)
155
+ if output.respond_to?(:to_html)
156
+ output.to_html
157
+ else
158
+ output.to_s
159
+ end
160
+ end
161
+
162
+ # Validator for required context. This will check that anything passed in
163
+ # contexts exists in @contexts
164
+ #
165
+ # If any errors are found an ArgumentError will be raised with a
166
+ # message listing all the missing contexts and the filters that
167
+ # require them.
168
+ def needs(*keys)
169
+ missing = keys.reject { |key| context.include? key }
170
+
171
+ if missing.any?
172
+ raise ArgumentError,
173
+ "Missing context keys for #{self.class.name}: #{missing.map(&:inspect).join ', '}"
174
+ end
175
+ end
176
+ end
177
+ end
178
+ end