html-pipeline-no-charlock 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. data/.gitignore +19 -0
  2. data/.travis.yml +13 -0
  3. data/CHANGELOG.md +16 -0
  4. data/Gemfile +9 -0
  5. data/LICENSE +22 -0
  6. data/README.md +221 -0
  7. data/Rakefile +13 -0
  8. data/html-pipeline-no-charlock.gemspec +25 -0
  9. data/html-pipeline.gemspec +26 -0
  10. data/lib/html/pipeline.rb +130 -0
  11. data/lib/html/pipeline/@mention_filter.rb +118 -0
  12. data/lib/html/pipeline/autolink_filter.rb +22 -0
  13. data/lib/html/pipeline/body_content.rb +42 -0
  14. data/lib/html/pipeline/camo_filter.rb +70 -0
  15. data/lib/html/pipeline/email_reply_filter.rb +56 -0
  16. data/lib/html/pipeline/emoji_filter.rb +54 -0
  17. data/lib/html/pipeline/filter.rb +178 -0
  18. data/lib/html/pipeline/https_filter.rb +13 -0
  19. data/lib/html/pipeline/image_max_width_filter.rb +37 -0
  20. data/lib/html/pipeline/markdown_filter.rb +29 -0
  21. data/lib/html/pipeline/plain_text_input_filter.rb +11 -0
  22. data/lib/html/pipeline/sanitization_filter.rb +105 -0
  23. data/lib/html/pipeline/syntax_highlight_filter.rb +29 -0
  24. data/lib/html/pipeline/text_filter.rb +14 -0
  25. data/lib/html/pipeline/textile_filter.rb +21 -0
  26. data/lib/html/pipeline/toc_filter.rb +28 -0
  27. data/lib/html/pipeline/version.rb +5 -0
  28. data/test/html/pipeline/autolink_filter_test.rb +22 -0
  29. data/test/html/pipeline/camo_filter_test.rb +47 -0
  30. data/test/html/pipeline/emoji_filter_test.rb +18 -0
  31. data/test/html/pipeline/image_max_width_filter_test.rb +50 -0
  32. data/test/html/pipeline/markdown_filter_test.rb +101 -0
  33. data/test/html/pipeline/mention_filter_test.rb +158 -0
  34. data/test/html/pipeline/plain_text_input_filter_test.rb +22 -0
  35. data/test/html/pipeline/sanitization_filter_test.rb +47 -0
  36. data/test/html/pipeline/toc_filter_test.rb +47 -0
  37. data/test/test_helper.rb +38 -0
  38. metadata +214 -0
@@ -0,0 +1,19 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ bin/*
19
+ vendor/gems
@@ -0,0 +1,13 @@
1
+ language: ruby
2
+
3
+ before_install:
4
+ - sudo apt-get update -qq
5
+ - sudo apt-get install -qq libicu-dev
6
+
7
+ script: "bundle exec rake"
8
+
9
+ rvm:
10
+ - 1.8.7
11
+ - 1.9.2
12
+ - 1.9.3
13
+ - ree
@@ -0,0 +1,16 @@
1
+ # CHANGELOG
2
+
3
+ ## 0.0.6
4
+
5
+ * don't mutate markdown strings: jakedouglas #32
6
+
7
+ ## 0.0.5
8
+
9
+ * fix li xss vulnerability in sanitization filter: vmg #31
10
+ * gemspec cleanup: nbibler #23, jbarnette #24
11
+ * doc updates: jch #16, pborreli #17, wickedshimmy #18, benubois #19, blackerby #21
12
+ * loosen gemoji dependency: josh #15
13
+
14
+ ## 0.0.4
15
+
16
+ * initial public release
data/Gemfile ADDED
@@ -0,0 +1,9 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in html-pipeline-no-charlock.gemspec
4
+ gemspec :name => 'html-pipeline-no-charlock'
5
+
6
+ group :development do
7
+ gem 'bundler'
8
+ gem 'rake'
9
+ end
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 GitHub Inc. and Jerry Cheung
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,221 @@
1
+ # HTML::Pipeline [![Build Status](https://secure.travis-ci.org/jch/html-pipeline.png)](http://travis-ci.org/jch/html-pipeline)
2
+
3
+ GitHub HTML processing filters and utilities. This module includes a small
4
+ framework for defining DOM based content filters and applying them to user
5
+ provided content.
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ ```ruby
12
+ gem 'html-pipeline'
13
+ ```
14
+
15
+ And then execute:
16
+
17
+ ```sh
18
+ $ bundle
19
+ ```
20
+
21
+ Or install it yourself as:
22
+
23
+ ```sh
24
+ $ gem install html-pipeline
25
+ ```
26
+
27
+ ## Usage
28
+
29
+ This library provides a handful of chainable HTML filters to transform user
30
+ content into markup. A filter takes an HTML string or
31
+ `Nokogiri::HTML::DocumentFragment`, optionally manipulates it, and then
32
+ outputs the result.
33
+
34
+ For example, to transform Markdown source into Markdown HTML:
35
+
36
+ ```ruby
37
+ require 'html/pipeline'
38
+
39
+ filter = HTML::Pipeline::MarkdownFilter.new("Hi **world**!")
40
+ filter.call
41
+ ```
42
+
43
+ Filters can be combined into a pipeline which causes each filter to hand its
44
+ output to the next filter's input. So if you wanted to have content be
45
+ filtered through Markdown and be syntax highlighted, you can create the
46
+ following pipeline:
47
+
48
+ ```ruby
49
+ pipeline = HTML::Pipeline.new [
50
+ HTML::Pipeline::MarkdownFilter,
51
+ HTML::Pipeline::SyntaxHighlightFilter
52
+ ]
53
+ result = pipeline.call <<-CODE
54
+ This is *great*:
55
+
56
+ ``` ruby
57
+ some_code(:first)
58
+ ```
59
+
60
+ CODE
61
+ result[:output].to_s
62
+ ```
63
+
64
+ Prints:
65
+
66
+ ```html
67
+ <p>This is <em>great</em>:</p>
68
+
69
+ <div class="highlight">
70
+ <pre><span class="n">some_code</span><span class="p">(</span><span class="ss">:first</span><span class="p">)</span>
71
+ </pre>
72
+ </div>
73
+ ```
74
+
75
+ Some filters take an optional **context** and/or **result** hash. These are
76
+ used to pass around arguments and metadata between filters in a pipeline. For
77
+ example, if you want don't want to use GitHub formatted Markdown, you can
78
+ pass an option in the context hash:
79
+
80
+ ```ruby
81
+ filter = HTML::Pipeline::MarkdownFilter.new("Hi **world**!", :gfm => false)
82
+ filter.call
83
+ ```
84
+
85
+ ## Filters
86
+
87
+ * `MentionFilter` - replace `@user` mentions with links
88
+ * `AutolinkFilter` - auto_linking urls in HTML
89
+ * `CamoFilter` - replace http image urls with [camo-fied](https://github.com/atmos/camo) https versions
90
+ * `EmailReplyFilter` - util filter for working with emails
91
+ * `EmojiFilter` - everyone loves [emoji](http://www.emoji-cheat-sheet.com/)!
92
+ * `HttpsFilter` - HTML Filter for replacing http github urls with https versions.
93
+ * `ImageMaxWidthFilter` - link to full size image for large images
94
+ * `MarkdownFilter` - convert markdown to html
95
+ * `PlainTextInputFilter` - html escape text and wrap the result in a div
96
+ * `SanitizationFilter` - whitelist sanitize user markup
97
+ * `SyntaxHighlightFilter` - code syntax highlighter with [linguist](https://github.com/github/linguist)
98
+ * `TextileFilter` - convert textile to html
99
+ * `TableOfContentsFilter` - anchor headings with name attributes
100
+
101
+ ## Examples
102
+
103
+ We define different pipelines for different parts of our app. Here are a few
104
+ paraphrased snippets to get you started:
105
+
106
+ ```ruby
107
+ # The context hash is how you pass options between different filters.
108
+ # See individual filter source for explanation of options.
109
+ context = {
110
+ :asset_root => "http://your-domain.com/where/your/images/live/icons",
111
+ :base_url => "http://your-domain.com"
112
+ }
113
+
114
+ # Pipeline providing sanitization and image hijacking but no mention
115
+ # related features.
116
+ SimplePipeline = Pipeline.new [
117
+ SanitizationFilter,
118
+ TableOfContentsFilter, # add 'name' anchors to all headers
119
+ CamoFilter,
120
+ ImageMaxWidthFilter,
121
+ SyntaxHighlightFilter,
122
+ EmojiFilter,
123
+ AutolinkFilter
124
+ ], context
125
+
126
+ # Pipeline used for user provided content on the web
127
+ MarkdownPipeline = Pipeline.new [
128
+ MarkdownFilter,
129
+ SanitizationFilter,
130
+ CamoFilter,
131
+ ImageMaxWidthFilter,
132
+ HttpsFilter,
133
+ MentionFilter,
134
+ EmojiFilter,
135
+ SyntaxHighlightFilter
136
+ ], context.merge(:gfm => true) # enable github formatted markdown
137
+
138
+
139
+ # Define a pipeline based on another pipeline's filters
140
+ NonGFMMarkdownPipeline = Pipeline.new(MarkdownPipeline.filters,
141
+ context.merge(:gfm => false))
142
+
143
+ # Pipelines aren't limited to the web. You can use them for email
144
+ # processing also.
145
+ HtmlEmailPipeline = Pipeline.new [
146
+ ImageMaxWidthFilter
147
+ ], {}
148
+
149
+ # Just emoji.
150
+ EmojiPipeline = Pipeline.new [
151
+ HTMLInputFilter,
152
+ EmojiFilter
153
+ ], context
154
+ ```
155
+
156
+ ## Extending
157
+ To write a custom filter, you need a class with a `call` method that inherits
158
+ from `HTML::Pipeline::Filter`.
159
+
160
+ For example this filter adds a base url to images that are root relative:
161
+
162
+ ```ruby
163
+ require 'uri'
164
+
165
+ class RootRelativeFilter < HTML::Pipeline::Filter
166
+
167
+ def call
168
+ doc.search("img").each do |img|
169
+ next if img['src'].nil?
170
+ src = img['src'].strip
171
+ if src.start_with? '/'
172
+ img["src"] = URI.join(context[:base_url], src).to_s
173
+ end
174
+ end
175
+ doc
176
+ end
177
+
178
+ end
179
+ ```
180
+
181
+ Now this filter can be used in a pipeline:
182
+
183
+ ```ruby
184
+ Pipeline.new [ RootRelativeFilter ], { :base_url => 'http://somehost.com' }
185
+ ```
186
+
187
+ ## Development
188
+
189
+ To see what has changed in recent versions, see the [CHANGELOG](https://github.com/jch/html-pipeline/blob/master/CHANGELOG.md).
190
+
191
+ ```sh
192
+ bundle
193
+ rake test
194
+ ```
195
+
196
+ ## Contributing
197
+
198
+ 1. [Fork it](https://help.github.com/articles/fork-a-repo)
199
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
200
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
201
+ 4. Push to the branch (`git push origin my-new-feature`)
202
+ 5. Create new [Pull Request](https://help.github.com/articles/using-pull-requests)
203
+
204
+
205
+ ## TODO
206
+
207
+ * test whether emoji filter works on heroku
208
+ * test whether nokogiri monkey patch is still necessary
209
+
210
+ ## Contributors
211
+
212
+ * [Aman Gupta](mailto:aman@tmm1.net)
213
+ * [Jake Boxer](mailto:jake@github.com)
214
+ * [Joshua Peek](mailto:josh@joshpeek.com)
215
+ * [Kyle Neath](mailto:kneath@gmail.com)
216
+ * [Rob Sanheim](mailto:rsanheim@gmail.com)
217
+ * [Simon Rozet](mailto:simon@rozet.name)
218
+ * [Vicent Martí](mailto:tanoku@gmail.com)
219
+ * [Risk :danger: Olson](mailto:technoweenie@gmail.com)
220
+
221
+ Project is a member of the [OSS Manifesto](http://ossmanifesto.org/).
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env rake
2
+ require 'bundler/gem_helper'
3
+ Bundler::GemHelper.install_tasks :name => 'html-pipeline-no-charlock'
4
+
5
+ require 'rake/testtask'
6
+
7
+ Rake::TestTask.new do |t|
8
+ t.libs << "test"
9
+ t.test_files = FileList['test/**/*_test.rb']
10
+ t.verbose = true
11
+ end
12
+
13
+ task :default => :test
@@ -0,0 +1,25 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path("../lib/html/pipeline/version", __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.name = "html-pipeline-no-charlock"
6
+ gem.version = HTML::Pipeline::VERSION
7
+ gem.license = "MIT"
8
+ gem.authors = ["Ryan Tomayko", "Jerry Cheung"]
9
+ gem.email = ["ryan@github.com", "jerry@github.com"]
10
+ gem.description = %q{GitHub HTML processing filters and utilities. This version remove dependencies on charlock_holmes to make it easy be installed on Windows}
11
+ gem.summary = %q{Helpers for processing content through a chain of filters}
12
+ gem.homepage = "https://github.com/doitian/html-pipeline"
13
+
14
+ gem.files = `git ls-files`.split $/
15
+ gem.test_files = gem.files.grep(%r{^test})
16
+ gem.require_paths = ["lib"]
17
+
18
+ gem.add_dependency "gemoji", "~> 1.0"
19
+ gem.add_dependency "nokogiri", "~> 1.4"
20
+ gem.add_dependency "github-markdown", "~> 0.5"
21
+ gem.add_dependency "sanitize", "~> 2.0"
22
+ gem.add_dependency "rinku", "~> 1.7"
23
+ gem.add_dependency "escape_utils", "~> 0.2"
24
+ gem.add_dependency "activesupport", ">= 2"
25
+ end
@@ -0,0 +1,26 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path("../lib/html/pipeline/version", __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.name = "html-pipeline"
6
+ gem.version = HTML::Pipeline::VERSION
7
+ gem.license = "MIT"
8
+ gem.authors = ["Ryan Tomayko", "Jerry Cheung"]
9
+ gem.email = ["ryan@github.com", "jerry@github.com"]
10
+ gem.description = %q{GitHub HTML processing filters and utilities}
11
+ gem.summary = %q{Helpers for processing content through a chain of filters}
12
+ gem.homepage = "https://github.com/jch/html-pipeline"
13
+
14
+ gem.files = `git ls-files`.split $/
15
+ gem.test_files = gem.files.grep(%r{^test})
16
+ gem.require_paths = ["lib"]
17
+
18
+ gem.add_dependency "gemoji", "~> 1.0"
19
+ gem.add_dependency "nokogiri", "~> 1.4"
20
+ gem.add_dependency "github-markdown", "~> 0.5"
21
+ gem.add_dependency "sanitize", "~> 2.0"
22
+ gem.add_dependency "github-linguist", "~> 2.1"
23
+ gem.add_dependency "rinku", "~> 1.7"
24
+ gem.add_dependency "escape_utils", "~> 0.2"
25
+ gem.add_dependency "activesupport", ">= 2"
26
+ end
@@ -0,0 +1,130 @@
1
+ require "nokogiri"
2
+ require "active_support/xml_mini/nokogiri" # convert Documents to hashes
3
+ require "escape_utils"
4
+
5
+ module HTML
6
+ # GitHub HTML processing filters and utilities. This module includes a small
7
+ # framework for defining DOM based content filters and applying them to user
8
+ # provided content.
9
+ #
10
+ # See HTML::Pipeline::Filter for information on building filters.
11
+ #
12
+ # Construct a Pipeline for running multiple HTML filters. A pipeline is created once
13
+ # with one to many filters, and it then can be `call`ed many times over the course
14
+ # of its lifetime with input.
15
+ #
16
+ # filters - Array of Filter objects. Each must respond to call(doc,
17
+ # context) and return the modified DocumentFragment or a
18
+ # String containing HTML markup. Filters are performed in the
19
+ # order provided.
20
+ # default_context - The default context hash. Values specified here will be merged
21
+ # into values from the each individual pipeline run. Can NOT be
22
+ # nil. Default: empty Hash.
23
+ # result_class - The default Class of the result object for individual
24
+ # calls. Default: Hash. Protip: Pass in a Struct to get
25
+ # some semblance of type safety.
26
+ class Pipeline
27
+ autoload :VERSION, 'html/pipeline/version'
28
+ autoload :Pipeline, 'html/pipeline/pipeline'
29
+ autoload :Filter, 'html/pipeline/filter'
30
+ autoload :BodyContent, 'html/pipeline/body_content'
31
+ autoload :AutolinkFilter, 'html/pipeline/autolink_filter'
32
+ autoload :CamoFilter, 'html/pipeline/camo_filter'
33
+ autoload :EmailReplyFilter, 'html/pipeline/email_reply_filter'
34
+ autoload :EmojiFilter, 'html/pipeline/emoji_filter'
35
+ autoload :HttpsFilter, 'html/pipeline/https_filter'
36
+ autoload :ImageMaxWidthFilter, 'html/pipeline/image_max_width_filter'
37
+ autoload :MarkdownFilter, 'html/pipeline/markdown_filter'
38
+ autoload :MentionFilter, 'html/pipeline/@mention_filter'
39
+ autoload :PlainTextInputFilter, 'html/pipeline/plain_text_input_filter'
40
+ autoload :SanitizationFilter, 'html/pipeline/sanitization_filter'
41
+ autoload :SyntaxHighlightFilter, 'html/pipeline/syntax_highlight_filter'
42
+ autoload :TextileFilter, 'html/pipeline/textile_filter'
43
+ autoload :TableOfContentsFilter, 'html/pipeline/toc_filter'
44
+ autoload :TextFilter, 'html/pipeline/text_filter'
45
+
46
+ # Our DOM implementation.
47
+ DocumentFragment = Nokogiri::HTML::DocumentFragment
48
+
49
+ # Parse a String into a DocumentFragment object. When a DocumentFragment is
50
+ # provided, return it verbatim.
51
+ def self.parse(document_or_html)
52
+ document_or_html ||= ''
53
+ if document_or_html.is_a?(String)
54
+ DocumentFragment.parse(document_or_html)
55
+ else
56
+ document_or_html
57
+ end
58
+ end
59
+
60
+ # Public: Returns an Array of Filter objects for this Pipeline.
61
+ attr_reader :filters
62
+
63
+ def initialize(filters, default_context = {}, result_class = nil)
64
+ raise ArgumentError, "default_context cannot be nil" if default_context.nil?
65
+ @filters = filters.flatten.freeze
66
+ @default_context = default_context.freeze
67
+ @result_class = result_class || Hash
68
+ end
69
+
70
+ # Apply all filters in the pipeline to the given HTML.
71
+ #
72
+ # html - A String containing HTML or a DocumentFragment object.
73
+ # context - The context hash passed to each filter. See the Filter docs
74
+ # for more info on possible values. This object MUST NOT be modified
75
+ # in place by filters. Use the Result for passing state back.
76
+ # result - The result Hash passed to each filter for modification. This
77
+ # is where Filters store extracted information from the content.
78
+ #
79
+ # Returns the result Hash after being filtered by this Pipeline. Contains an
80
+ # :output key with the DocumentFragment or String HTML markup based on the
81
+ # output of the last filter in the pipeline.
82
+ def call(html, context = {}, result = nil)
83
+ context = @default_context.merge(context)
84
+ context = context.freeze
85
+ result ||= @result_class.new
86
+ result[:output] = @filters.inject(html) { |doc, filter| filter.call(doc, context, result) }
87
+ result
88
+ end
89
+
90
+ # Like call but guarantee the value returned is a DocumentFragment.
91
+ # Pipelines may return a DocumentFragment or a String. Callers that need a
92
+ # DocumentFragment should use this method.
93
+ def to_document(input, context = {}, result = nil)
94
+ result = call(input, context, result)
95
+ HTML::Pipeline.parse(result[:output])
96
+ end
97
+
98
+ # Like call but guarantee the value returned is a string of HTML markup.
99
+ def to_html(input, context = {}, result = nil)
100
+ result = call(input, context, result = nil)
101
+ output = result[:output]
102
+ if output.respond_to?(:to_html)
103
+ output.to_html
104
+ else
105
+ output.to_s
106
+ end
107
+ end
108
+ end
109
+ end
110
+
111
+ # XXX nokogiri monkey patches
112
+ class Nokogiri::XML::Node
113
+ # Work around an issue with utf-8 encoded data being erroneously converted to
114
+ # ... some other shit when replacing text nodes. See 'utf-8 output 2' in
115
+ # user_content_test.rb for details.
116
+ def replace_with_encoding_fix(replacement)
117
+ if replacement.respond_to?(:to_str)
118
+ replacement = document.fragment("<div>#{replacement}</div>").children.first.children
119
+ end
120
+ replace_without_encoding_fix(replacement)
121
+ end
122
+
123
+ alias_method :replace_without_encoding_fix, :replace
124
+ alias_method :replace, :replace_with_encoding_fix
125
+
126
+ def swap(replacement)
127
+ replace(replacement)
128
+ self
129
+ end
130
+ end