geothird-html-pipeline 0.0.12
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +19 -0
- data/.travis.yml +13 -0
- data/CHANGELOG.md +43 -0
- data/Gemfile +9 -0
- data/LICENSE +22 -0
- data/README.md +274 -0
- data/Rakefile +11 -0
- data/bin/html-pipeline +80 -0
- data/geothird-html-pipeline.gemspec +27 -0
- data/lib/html/pipeline.rb +198 -0
- data/lib/html/pipeline/@mention_filter.rb +121 -0
- data/lib/html/pipeline/absolute_source_filter.rb +48 -0
- data/lib/html/pipeline/autolink_filter.rb +22 -0
- data/lib/html/pipeline/body_content.rb +42 -0
- data/lib/html/pipeline/camo_filter.rb +70 -0
- data/lib/html/pipeline/email_reply_filter.rb +56 -0
- data/lib/html/pipeline/emoji_filter.rb +54 -0
- data/lib/html/pipeline/filter.rb +178 -0
- data/lib/html/pipeline/https_filter.rb +13 -0
- data/lib/html/pipeline/image_max_width_filter.rb +37 -0
- data/lib/html/pipeline/markdown_filter.rb +29 -0
- data/lib/html/pipeline/plain_text_input_filter.rb +11 -0
- data/lib/html/pipeline/sanitization_filter.rb +105 -0
- data/lib/html/pipeline/syntax_highlight_filter.rb +33 -0
- data/lib/html/pipeline/text_filter.rb +14 -0
- data/lib/html/pipeline/textile_filter.rb +21 -0
- data/lib/html/pipeline/toc_filter.rb +28 -0
- data/lib/html/pipeline/version.rb +5 -0
- data/test/helpers/mocked_instrumentation_service.rb +17 -0
- data/test/html/pipeline/absolute_source_filter_test.rb +56 -0
- data/test/html/pipeline/autolink_filter_test.rb +22 -0
- data/test/html/pipeline/camo_filter_test.rb +47 -0
- data/test/html/pipeline/emoji_filter_test.rb +18 -0
- data/test/html/pipeline/image_max_width_filter_test.rb +50 -0
- data/test/html/pipeline/markdown_filter_test.rb +101 -0
- data/test/html/pipeline/mention_filter_test.rb +156 -0
- data/test/html/pipeline/plain_text_input_filter_test.rb +22 -0
- data/test/html/pipeline/sanitization_filter_test.rb +47 -0
- data/test/html/pipeline/toc_filter_test.rb +47 -0
- data/test/html/pipeline_test.rb +74 -0
- data/test/test_helper.rb +38 -0
- metadata +213 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 5345ec14f85df8e82bc3aca081c65e4b94aa7878
|
4
|
+
data.tar.gz: d6bdcd46d8baad4c6bda04d5f1ecedaedde72aa0
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: f5d4cf9bc9dca29a4330cc8000889f64e9ea1fffb0fef0fec01f50d20f8870f60961f0e1bb997d210f71c6a4619cc915b993de6689e2b9ab4741192ebfaf9f41
|
7
|
+
data.tar.gz: 6688675d57544c1004330867842bd3062970c74322e5fe4800bb37f30466c83d5d06588c859418503520dfc18b3ab0f8146e9d3904dce4a4d7b6a65c8c04dbf0
|
data/.gitignore
ADDED
data/.travis.yml
ADDED
data/CHANGELOG.md
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
# CHANGELOG
|
2
|
+
|
3
|
+
## 0.0.12
|
4
|
+
|
5
|
+
* add additional payload information for instrumentation mtodd #46
|
6
|
+
* generate and link to gem docs in README
|
7
|
+
|
8
|
+
## 0.0.11
|
9
|
+
|
10
|
+
* add instrumentation support. readme cleanup mtodd #45
|
11
|
+
|
12
|
+
## 0.0.10
|
13
|
+
|
14
|
+
* add bin/html-pipeline util indirect #44
|
15
|
+
* add result[:mentioned_usernames] for MentionFilter fachen #42
|
16
|
+
|
17
|
+
## 0.0.9
|
18
|
+
|
19
|
+
* bump escape_utils ~> 0.3, github-linguist ~> 2.6.2 brianmario #41
|
20
|
+
* remove nokogiri monkey patch for ruby >= 1.9 defunkt #40
|
21
|
+
|
22
|
+
## 0.0.8
|
23
|
+
|
24
|
+
* raise LoadError instead of printing to stderr if linguist is missing. gjtorikian #36
|
25
|
+
|
26
|
+
## 0.0.7
|
27
|
+
|
28
|
+
* optionally require github-linguist chrislloyd #33
|
29
|
+
|
30
|
+
## 0.0.6
|
31
|
+
|
32
|
+
* don't mutate markdown strings: jakedouglas #32
|
33
|
+
|
34
|
+
## 0.0.5
|
35
|
+
|
36
|
+
* fix li xss vulnerability in sanitization filter: vmg #31
|
37
|
+
* gemspec cleanup: nbibler #23, jbarnette #24
|
38
|
+
* doc updates: jch #16, pborreli #17, wickedshimmy #18, benubois #19, blackerby #21
|
39
|
+
* loosen gemoji dependency: josh #15
|
40
|
+
|
41
|
+
## 0.0.4
|
42
|
+
|
43
|
+
* initial public release
|
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2012 GitHub Inc. and Jerry Cheung
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,274 @@
|
|
1
|
+
# HTML::Pipeline [![Build Status](https://secure.travis-ci.org/jch/html-pipeline.png)](http://travis-ci.org/jch/html-pipeline)
|
2
|
+
|
3
|
+
GitHub HTML processing filters and utilities. This module includes a small
|
4
|
+
framework for defining DOM based content filters and applying them to user
|
5
|
+
provided content. Read an introduction about this project in
|
6
|
+
[this blog post](https://github.com/blog/1311-html-pipeline-chainable-content-filters).
|
7
|
+
|
8
|
+
## Installation
|
9
|
+
|
10
|
+
Add this line to your application's Gemfile:
|
11
|
+
|
12
|
+
```ruby
|
13
|
+
gem 'html-pipeline'
|
14
|
+
```
|
15
|
+
|
16
|
+
And then execute:
|
17
|
+
|
18
|
+
```sh
|
19
|
+
$ bundle
|
20
|
+
```
|
21
|
+
|
22
|
+
Or install it yourself as:
|
23
|
+
|
24
|
+
```sh
|
25
|
+
$ gem install html-pipeline
|
26
|
+
```
|
27
|
+
|
28
|
+
## Usage
|
29
|
+
|
30
|
+
This library provides a handful of chainable HTML filters to transform user
|
31
|
+
content into markup. A filter takes an HTML string or
|
32
|
+
`Nokogiri::HTML::DocumentFragment`, optionally manipulates it, and then
|
33
|
+
outputs the result.
|
34
|
+
|
35
|
+
For example, to transform Markdown source into Markdown HTML:
|
36
|
+
|
37
|
+
```ruby
|
38
|
+
require 'html/pipeline'
|
39
|
+
|
40
|
+
filter = HTML::Pipeline::MarkdownFilter.new("Hi **world**!")
|
41
|
+
filter.call
|
42
|
+
```
|
43
|
+
|
44
|
+
Filters can be combined into a pipeline which causes each filter to hand its
|
45
|
+
output to the next filter's input. So if you wanted to have content be
|
46
|
+
filtered through Markdown and be syntax highlighted, you can create the
|
47
|
+
following pipeline:
|
48
|
+
|
49
|
+
```ruby
|
50
|
+
pipeline = HTML::Pipeline.new [
|
51
|
+
HTML::Pipeline::MarkdownFilter,
|
52
|
+
HTML::Pipeline::SyntaxHighlightFilter
|
53
|
+
]
|
54
|
+
result = pipeline.call <<-CODE
|
55
|
+
This is *great*:
|
56
|
+
|
57
|
+
some_code(:first)
|
58
|
+
|
59
|
+
CODE
|
60
|
+
result[:output].to_s
|
61
|
+
```
|
62
|
+
|
63
|
+
Prints:
|
64
|
+
|
65
|
+
```html
|
66
|
+
<p>This is <em>great</em>:</p>
|
67
|
+
|
68
|
+
<div class="highlight">
|
69
|
+
<pre><span class="n">some_code</span><span class="p">(</span><span class="ss">:first</span><span class="p">)</span>
|
70
|
+
</pre>
|
71
|
+
</div>
|
72
|
+
```
|
73
|
+
|
74
|
+
Some filters take an optional **context** and/or **result** hash. These are
|
75
|
+
used to pass around arguments and metadata between filters in a pipeline. For
|
76
|
+
example, if you want don't want to use GitHub formatted Markdown, you can
|
77
|
+
pass an option in the context hash:
|
78
|
+
|
79
|
+
```ruby
|
80
|
+
filter = HTML::Pipeline::MarkdownFilter.new("Hi **world**!", :gfm => false)
|
81
|
+
filter.call
|
82
|
+
```
|
83
|
+
|
84
|
+
## Filters
|
85
|
+
|
86
|
+
* `MentionFilter` - replace `@user` mentions with links
|
87
|
+
* `AbsoluteSourceFilter` - replace relative image urls with fully qualified versions
|
88
|
+
* `AutoLinkFilter` - auto_linking urls in HTML
|
89
|
+
* `CamoFilter` - replace http image urls with [camo-fied](https://github.com/atmos/camo) https versions
|
90
|
+
* `EmailReplyFilter` - util filter for working with emails
|
91
|
+
* `EmojiFilter` - everyone loves [emoji](http://www.emoji-cheat-sheet.com/)!
|
92
|
+
* `HttpsFilter` - HTML Filter for replacing http github urls with https versions.
|
93
|
+
* `ImageMaxWidthFilter` - link to full size image for large images
|
94
|
+
* `MarkdownFilter` - convert markdown to html
|
95
|
+
* `PlainTextInputFilter` - html escape text and wrap the result in a div
|
96
|
+
* `SanitizationFilter` - whitelist sanitize user markup
|
97
|
+
* `SyntaxHighlightFilter` - [code syntax highlighter](#syntax-highlighting)
|
98
|
+
* `TextileFilter` - convert textile to html
|
99
|
+
* `TableOfContentsFilter` - anchor headings with name attributes
|
100
|
+
|
101
|
+
## Syntax highlighting
|
102
|
+
|
103
|
+
`SyntaxHighlightFilter` uses [github-linguist](https://github.com/github/linguist)
|
104
|
+
to detect and highlight languages. It isn't included as a dependency by default
|
105
|
+
because it's a large dependency and
|
106
|
+
[a hassle to build on heroku](https://github.com/jch/html-pipeline/issues/33).
|
107
|
+
To use the filter, add the following to your Gemfile:
|
108
|
+
|
109
|
+
```ruby
|
110
|
+
gem 'github-linguist'
|
111
|
+
```
|
112
|
+
|
113
|
+
## Examples
|
114
|
+
|
115
|
+
We define different pipelines for different parts of our app. Here are a few
|
116
|
+
paraphrased snippets to get you started:
|
117
|
+
|
118
|
+
```ruby
|
119
|
+
# The context hash is how you pass options between different filters.
|
120
|
+
# See individual filter source for explanation of options.
|
121
|
+
context = {
|
122
|
+
:asset_root => "http://your-domain.com/where/your/images/live/icons",
|
123
|
+
:base_url => "http://your-domain.com"
|
124
|
+
}
|
125
|
+
|
126
|
+
# Pipeline providing sanitization and image hijacking but no mention
|
127
|
+
# related features.
|
128
|
+
SimplePipeline = Pipeline.new [
|
129
|
+
SanitizationFilter,
|
130
|
+
TableOfContentsFilter, # add 'name' anchors to all headers
|
131
|
+
CamoFilter,
|
132
|
+
ImageMaxWidthFilter,
|
133
|
+
SyntaxHighlightFilter,
|
134
|
+
EmojiFilter,
|
135
|
+
AutolinkFilter
|
136
|
+
], context
|
137
|
+
|
138
|
+
# Pipeline used for user provided content on the web
|
139
|
+
MarkdownPipeline = Pipeline.new [
|
140
|
+
MarkdownFilter,
|
141
|
+
SanitizationFilter,
|
142
|
+
CamoFilter,
|
143
|
+
ImageMaxWidthFilter,
|
144
|
+
HttpsFilter,
|
145
|
+
MentionFilter,
|
146
|
+
EmojiFilter,
|
147
|
+
SyntaxHighlightFilter
|
148
|
+
], context.merge(:gfm => true) # enable github formatted markdown
|
149
|
+
|
150
|
+
|
151
|
+
# Define a pipeline based on another pipeline's filters
|
152
|
+
NonGFMMarkdownPipeline = Pipeline.new(MarkdownPipeline.filters,
|
153
|
+
context.merge(:gfm => false))
|
154
|
+
|
155
|
+
# Pipelines aren't limited to the web. You can use them for email
|
156
|
+
# processing also.
|
157
|
+
HtmlEmailPipeline = Pipeline.new [
|
158
|
+
ImageMaxWidthFilter
|
159
|
+
], {}
|
160
|
+
|
161
|
+
# Just emoji.
|
162
|
+
EmojiPipeline = Pipeline.new [
|
163
|
+
HTMLInputFilter,
|
164
|
+
EmojiFilter
|
165
|
+
], context
|
166
|
+
```
|
167
|
+
|
168
|
+
## Extending
|
169
|
+
To write a custom filter, you need a class with a `call` method that inherits
|
170
|
+
from `HTML::Pipeline::Filter`.
|
171
|
+
|
172
|
+
For example this filter adds a base url to images that are root relative:
|
173
|
+
|
174
|
+
```ruby
|
175
|
+
require 'uri'
|
176
|
+
|
177
|
+
class RootRelativeFilter < HTML::Pipeline::Filter
|
178
|
+
|
179
|
+
def call
|
180
|
+
doc.search("img").each do |img|
|
181
|
+
next if img['src'].nil?
|
182
|
+
src = img['src'].strip
|
183
|
+
if src.start_with? '/'
|
184
|
+
img["src"] = URI.join(context[:base_url], src).to_s
|
185
|
+
end
|
186
|
+
end
|
187
|
+
doc
|
188
|
+
end
|
189
|
+
|
190
|
+
end
|
191
|
+
```
|
192
|
+
|
193
|
+
Now this filter can be used in a pipeline:
|
194
|
+
|
195
|
+
```ruby
|
196
|
+
Pipeline.new [ RootRelativeFilter ], { :base_url => 'http://somehost.com' }
|
197
|
+
```
|
198
|
+
|
199
|
+
## Instrumenting
|
200
|
+
|
201
|
+
Filters and Pipelines can be set up to be instrumented when called. The pipeline
|
202
|
+
must be setup with an [ActiveSupport::Notifications]
|
203
|
+
(http://api.rubyonrails.org/classes/ActiveSupport/Notifications.html)
|
204
|
+
compatible service object and a name. New pipeline objects will default to the
|
205
|
+
`HTML::Pipeline.default_instrumentation_service` object.
|
206
|
+
|
207
|
+
``` ruby
|
208
|
+
# the AS::Notifications-compatible service object
|
209
|
+
service = ActiveSupport::Notifications
|
210
|
+
|
211
|
+
# instrument a specific pipeline
|
212
|
+
pipeline = HTML::Pipeline.new [MarkdownFilter], context
|
213
|
+
pipeline.setup_instrumentation "MarkdownPipeline", service
|
214
|
+
|
215
|
+
# or set default instrumentation service for all new pipelines
|
216
|
+
HTML::Pipeline.default_instrumentation_service = service
|
217
|
+
pipeline = HTML::Pipeline.new [MarkdownFilter], context
|
218
|
+
pipeline.setup_instrumentation "MarkdownPipeline"
|
219
|
+
```
|
220
|
+
|
221
|
+
Filters are instrumented when they are run through the pipeline. A
|
222
|
+
`call_filter.html_pipeline` event is published once the filter finishes. The
|
223
|
+
`payload` should include the `filter` name. Each filter will trigger its own
|
224
|
+
instrumentation call.
|
225
|
+
|
226
|
+
``` ruby
|
227
|
+
service.subscribe "call_filter.html_pipeline" do |event, start, ending, transaction_id, payload|
|
228
|
+
payload[:pipeline] #=> "MarkdownPipeline", set with `setup_instrumentation`
|
229
|
+
payload[:filter] #=> "MarkdownFilter"
|
230
|
+
payload[:context] #=> context Hash
|
231
|
+
payload[:result] #=> instance of result class
|
232
|
+
payload[:result][:output] #=> output HTML String or Nokogiri::DocumentFragment
|
233
|
+
end
|
234
|
+
```
|
235
|
+
|
236
|
+
The full pipeline is also instrumented:
|
237
|
+
|
238
|
+
``` ruby
|
239
|
+
service.subscribe "call_pipeline.html_pipeline" do |event, start, ending, transaction_id, payload|
|
240
|
+
payload[:pipeline] #=> "MarkdownPipeline", set with `setup_instrumentation`
|
241
|
+
payload[:filters] #=> ["MarkdownFilter"]
|
242
|
+
payload[:doc] #=> HTML String or Nokogiri::DocumentFragment
|
243
|
+
payload[:context] #=> context Hash
|
244
|
+
payload[:result] #=> instance of result class
|
245
|
+
payload[:result][:output] #=> output HTML String or Nokogiri::DocumentFragment
|
246
|
+
end
|
247
|
+
```
|
248
|
+
|
249
|
+
## Documentation
|
250
|
+
|
251
|
+
Full reference documentation can be [found here](http://rubydoc.info/gems/html-pipeline/frames).
|
252
|
+
|
253
|
+
## Development
|
254
|
+
|
255
|
+
To see what has changed in recent versions, see the [CHANGELOG](https://github.com/jch/html-pipeline/blob/master/CHANGELOG.md).
|
256
|
+
|
257
|
+
```sh
|
258
|
+
bundle
|
259
|
+
rake test
|
260
|
+
```
|
261
|
+
|
262
|
+
## Contributing
|
263
|
+
|
264
|
+
1. [Fork it](https://help.github.com/articles/fork-a-repo)
|
265
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
266
|
+
3. Commit your changes (`git commit -am 'Added some feature'`)
|
267
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
268
|
+
5. Create new [Pull Request](https://help.github.com/articles/using-pull-requests)
|
269
|
+
|
270
|
+
## Contributors
|
271
|
+
|
272
|
+
Thanks to all of [these contributors](https://github.com/jch/html-pipeline/graphs/contributors).
|
273
|
+
|
274
|
+
Project is a member of the [OSS Manifesto](http://ossmanifesto.org/).
|
data/Rakefile
ADDED
data/bin/html-pipeline
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'html/pipeline'
|
3
|
+
|
4
|
+
require 'optparse'
|
5
|
+
|
6
|
+
# Accept "help", too
|
7
|
+
ARGV.map!{|a| a == "help" ? "--help" : a }
|
8
|
+
|
9
|
+
OptionParser.new do |opts|
|
10
|
+
opts.banner = <<-HELP.gsub(/^ /, '')
|
11
|
+
Usage: html-pipeline [-h] [-f]
|
12
|
+
html-pipeline [FILTER [FILTER [...]]] < file.md
|
13
|
+
cat file.md | html-pipeline [FILTER [FILTER [...]]]
|
14
|
+
HELP
|
15
|
+
|
16
|
+
opts.separator "Options:"
|
17
|
+
|
18
|
+
opts.on("-f", "--filters", "List the available filters") do
|
19
|
+
filters = HTML::Pipeline.constants.grep(/\w+Filter$/).
|
20
|
+
map{|f| f.to_s.gsub(/Filter$/,'') }
|
21
|
+
|
22
|
+
# Text filter doesn't work, no call method
|
23
|
+
filters -= ["Text"]
|
24
|
+
|
25
|
+
abort <<-HELP.gsub(/^ /, '')
|
26
|
+
Available filters:
|
27
|
+
#{filters.join("\n ")}
|
28
|
+
HELP
|
29
|
+
end
|
30
|
+
end.parse!
|
31
|
+
|
32
|
+
# Default to a GitHub-ish pipeline
|
33
|
+
if ARGV.empty?
|
34
|
+
|
35
|
+
filters = [
|
36
|
+
HTML::Pipeline::MarkdownFilter,
|
37
|
+
HTML::Pipeline::SanitizationFilter,
|
38
|
+
HTML::Pipeline::ImageMaxWidthFilter,
|
39
|
+
HTML::Pipeline::EmojiFilter,
|
40
|
+
HTML::Pipeline::AutolinkFilter,
|
41
|
+
HTML::Pipeline::TableOfContentsFilter,
|
42
|
+
]
|
43
|
+
|
44
|
+
# Add syntax highlighting if linguist is present
|
45
|
+
begin
|
46
|
+
require 'linguist'
|
47
|
+
filters << HTML::Pipeline::SyntaxHighlightFilter
|
48
|
+
rescue LoadError
|
49
|
+
end
|
50
|
+
|
51
|
+
else
|
52
|
+
|
53
|
+
def filter_named(name)
|
54
|
+
case name
|
55
|
+
when "Text"
|
56
|
+
raise NameError # Text filter doesn't work, no call method
|
57
|
+
when "Textile"
|
58
|
+
require "RedCloth" # Textile filter doesn't require RedCloth
|
59
|
+
end
|
60
|
+
|
61
|
+
HTML::Pipeline.const_get("#{name}Filter")
|
62
|
+
rescue NameError => e
|
63
|
+
abort "Unknown filter '#{name}'. List filters with the -f option."
|
64
|
+
end
|
65
|
+
|
66
|
+
filters = []
|
67
|
+
until ARGV.empty?
|
68
|
+
name = ARGV.shift
|
69
|
+
filters << filter_named(name)
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|
73
|
+
|
74
|
+
context = {
|
75
|
+
:asset_root => "/assets",
|
76
|
+
:base_url => "/",
|
77
|
+
:gfm => true
|
78
|
+
}
|
79
|
+
|
80
|
+
puts HTML::Pipeline.new(filters, context).call(ARGF.read)[:output]
|