geothird-html-pipeline 0.0.12

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +19 -0
  3. data/.travis.yml +13 -0
  4. data/CHANGELOG.md +43 -0
  5. data/Gemfile +9 -0
  6. data/LICENSE +22 -0
  7. data/README.md +274 -0
  8. data/Rakefile +11 -0
  9. data/bin/html-pipeline +80 -0
  10. data/geothird-html-pipeline.gemspec +27 -0
  11. data/lib/html/pipeline.rb +198 -0
  12. data/lib/html/pipeline/@mention_filter.rb +121 -0
  13. data/lib/html/pipeline/absolute_source_filter.rb +48 -0
  14. data/lib/html/pipeline/autolink_filter.rb +22 -0
  15. data/lib/html/pipeline/body_content.rb +42 -0
  16. data/lib/html/pipeline/camo_filter.rb +70 -0
  17. data/lib/html/pipeline/email_reply_filter.rb +56 -0
  18. data/lib/html/pipeline/emoji_filter.rb +54 -0
  19. data/lib/html/pipeline/filter.rb +178 -0
  20. data/lib/html/pipeline/https_filter.rb +13 -0
  21. data/lib/html/pipeline/image_max_width_filter.rb +37 -0
  22. data/lib/html/pipeline/markdown_filter.rb +29 -0
  23. data/lib/html/pipeline/plain_text_input_filter.rb +11 -0
  24. data/lib/html/pipeline/sanitization_filter.rb +105 -0
  25. data/lib/html/pipeline/syntax_highlight_filter.rb +33 -0
  26. data/lib/html/pipeline/text_filter.rb +14 -0
  27. data/lib/html/pipeline/textile_filter.rb +21 -0
  28. data/lib/html/pipeline/toc_filter.rb +28 -0
  29. data/lib/html/pipeline/version.rb +5 -0
  30. data/test/helpers/mocked_instrumentation_service.rb +17 -0
  31. data/test/html/pipeline/absolute_source_filter_test.rb +56 -0
  32. data/test/html/pipeline/autolink_filter_test.rb +22 -0
  33. data/test/html/pipeline/camo_filter_test.rb +47 -0
  34. data/test/html/pipeline/emoji_filter_test.rb +18 -0
  35. data/test/html/pipeline/image_max_width_filter_test.rb +50 -0
  36. data/test/html/pipeline/markdown_filter_test.rb +101 -0
  37. data/test/html/pipeline/mention_filter_test.rb +156 -0
  38. data/test/html/pipeline/plain_text_input_filter_test.rb +22 -0
  39. data/test/html/pipeline/sanitization_filter_test.rb +47 -0
  40. data/test/html/pipeline/toc_filter_test.rb +47 -0
  41. data/test/html/pipeline_test.rb +74 -0
  42. data/test/test_helper.rb +38 -0
  43. metadata +213 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 5345ec14f85df8e82bc3aca081c65e4b94aa7878
4
+ data.tar.gz: d6bdcd46d8baad4c6bda04d5f1ecedaedde72aa0
5
+ SHA512:
6
+ metadata.gz: f5d4cf9bc9dca29a4330cc8000889f64e9ea1fffb0fef0fec01f50d20f8870f60961f0e1bb997d210f71c6a4619cc915b993de6689e2b9ab4741192ebfaf9f41
7
+ data.tar.gz: 6688675d57544c1004330867842bd3062970c74322e5fe4800bb37f30466c83d5d06588c859418503520dfc18b3ab0f8146e9d3904dce4a4d7b6a65c8c04dbf0
@@ -0,0 +1,19 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ exec/*
19
+ vendor/gems
@@ -0,0 +1,13 @@
1
+ language: ruby
2
+
3
+ before_install:
4
+ - sudo apt-get update -qq
5
+ - sudo apt-get install -qq libicu-dev
6
+
7
+ script: "bundle exec rake"
8
+
9
+ rvm:
10
+ - 1.8.7
11
+ - 1.9.2
12
+ - 1.9.3
13
+ - ree
@@ -0,0 +1,43 @@
1
+ # CHANGELOG
2
+
3
+ ## 0.0.12
4
+
5
+ * add additional payload information for instrumentation mtodd #46
6
+ * generate and link to gem docs in README
7
+
8
+ ## 0.0.11
9
+
10
+ * add instrumentation support. readme cleanup mtodd #45
11
+
12
+ ## 0.0.10
13
+
14
+ * add bin/html-pipeline util indirect #44
15
+ * add result[:mentioned_usernames] for MentionFilter fachen #42
16
+
17
+ ## 0.0.9
18
+
19
+ * bump escape_utils ~> 0.3, github-linguist ~> 2.6.2 brianmario #41
20
+ * remove nokogiri monkey patch for ruby >= 1.9 defunkt #40
21
+
22
+ ## 0.0.8
23
+
24
+ * raise LoadError instead of printing to stderr if linguist is missing. gjtorikian #36
25
+
26
+ ## 0.0.7
27
+
28
+ * optionally require github-linguist chrislloyd #33
29
+
30
+ ## 0.0.6
31
+
32
+ * don't mutate markdown strings: jakedouglas #32
33
+
34
+ ## 0.0.5
35
+
36
+ * fix li xss vulnerability in sanitization filter: vmg #31
37
+ * gemspec cleanup: nbibler #23, jbarnette #24
38
+ * doc updates: jch #16, pborreli #17, wickedshimmy #18, benubois #19, blackerby #21
39
+ * loosen gemoji dependency: josh #15
40
+
41
+ ## 0.0.4
42
+
43
+ * initial public release
data/Gemfile ADDED
@@ -0,0 +1,9 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in html-pipeline.gemspec
4
+ gemspec
5
+
6
+ group :development do
7
+ gem 'bundler'
8
+ gem 'rake'
9
+ end
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 GitHub Inc. and Jerry Cheung
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,274 @@
1
+ # HTML::Pipeline [![Build Status](https://secure.travis-ci.org/jch/html-pipeline.png)](http://travis-ci.org/jch/html-pipeline)
2
+
3
+ GitHub HTML processing filters and utilities. This module includes a small
4
+ framework for defining DOM based content filters and applying them to user
5
+ provided content. Read an introduction about this project in
6
+ [this blog post](https://github.com/blog/1311-html-pipeline-chainable-content-filters).
7
+
8
+ ## Installation
9
+
10
+ Add this line to your application's Gemfile:
11
+
12
+ ```ruby
13
+ gem 'html-pipeline'
14
+ ```
15
+
16
+ And then execute:
17
+
18
+ ```sh
19
+ $ bundle
20
+ ```
21
+
22
+ Or install it yourself as:
23
+
24
+ ```sh
25
+ $ gem install html-pipeline
26
+ ```
27
+
28
+ ## Usage
29
+
30
+ This library provides a handful of chainable HTML filters to transform user
31
+ content into markup. A filter takes an HTML string or
32
+ `Nokogiri::HTML::DocumentFragment`, optionally manipulates it, and then
33
+ outputs the result.
34
+
35
+ For example, to transform Markdown source into Markdown HTML:
36
+
37
+ ```ruby
38
+ require 'html/pipeline'
39
+
40
+ filter = HTML::Pipeline::MarkdownFilter.new("Hi **world**!")
41
+ filter.call
42
+ ```
43
+
44
+ Filters can be combined into a pipeline which causes each filter to hand its
45
+ output to the next filter's input. So if you wanted to have content be
46
+ filtered through Markdown and be syntax highlighted, you can create the
47
+ following pipeline:
48
+
49
+ ```ruby
50
+ pipeline = HTML::Pipeline.new [
51
+ HTML::Pipeline::MarkdownFilter,
52
+ HTML::Pipeline::SyntaxHighlightFilter
53
+ ]
54
+ result = pipeline.call <<-CODE
55
+ This is *great*:
56
+
57
+ some_code(:first)
58
+
59
+ CODE
60
+ result[:output].to_s
61
+ ```
62
+
63
+ Prints:
64
+
65
+ ```html
66
+ <p>This is <em>great</em>:</p>
67
+
68
+ <div class="highlight">
69
+ <pre><span class="n">some_code</span><span class="p">(</span><span class="ss">:first</span><span class="p">)</span>
70
+ </pre>
71
+ </div>
72
+ ```
73
+
74
+ Some filters take an optional **context** and/or **result** hash. These are
75
+ used to pass around arguments and metadata between filters in a pipeline. For
76
+ example, if you want don't want to use GitHub formatted Markdown, you can
77
+ pass an option in the context hash:
78
+
79
+ ```ruby
80
+ filter = HTML::Pipeline::MarkdownFilter.new("Hi **world**!", :gfm => false)
81
+ filter.call
82
+ ```
83
+
84
+ ## Filters
85
+
86
+ * `MentionFilter` - replace `@user` mentions with links
87
+ * `AbsoluteSourceFilter` - replace relative image urls with fully qualified versions
88
+ * `AutoLinkFilter` - auto_linking urls in HTML
89
+ * `CamoFilter` - replace http image urls with [camo-fied](https://github.com/atmos/camo) https versions
90
+ * `EmailReplyFilter` - util filter for working with emails
91
+ * `EmojiFilter` - everyone loves [emoji](http://www.emoji-cheat-sheet.com/)!
92
+ * `HttpsFilter` - HTML Filter for replacing http github urls with https versions.
93
+ * `ImageMaxWidthFilter` - link to full size image for large images
94
+ * `MarkdownFilter` - convert markdown to html
95
+ * `PlainTextInputFilter` - html escape text and wrap the result in a div
96
+ * `SanitizationFilter` - whitelist sanitize user markup
97
+ * `SyntaxHighlightFilter` - [code syntax highlighter](#syntax-highlighting)
98
+ * `TextileFilter` - convert textile to html
99
+ * `TableOfContentsFilter` - anchor headings with name attributes
100
+
101
+ ## Syntax highlighting
102
+
103
+ `SyntaxHighlightFilter` uses [github-linguist](https://github.com/github/linguist)
104
+ to detect and highlight languages. It isn't included as a dependency by default
105
+ because it's a large dependency and
106
+ [a hassle to build on heroku](https://github.com/jch/html-pipeline/issues/33).
107
+ To use the filter, add the following to your Gemfile:
108
+
109
+ ```ruby
110
+ gem 'github-linguist'
111
+ ```
112
+
113
+ ## Examples
114
+
115
+ We define different pipelines for different parts of our app. Here are a few
116
+ paraphrased snippets to get you started:
117
+
118
+ ```ruby
119
+ # The context hash is how you pass options between different filters.
120
+ # See individual filter source for explanation of options.
121
+ context = {
122
+ :asset_root => "http://your-domain.com/where/your/images/live/icons",
123
+ :base_url => "http://your-domain.com"
124
+ }
125
+
126
+ # Pipeline providing sanitization and image hijacking but no mention
127
+ # related features.
128
+ SimplePipeline = Pipeline.new [
129
+ SanitizationFilter,
130
+ TableOfContentsFilter, # add 'name' anchors to all headers
131
+ CamoFilter,
132
+ ImageMaxWidthFilter,
133
+ SyntaxHighlightFilter,
134
+ EmojiFilter,
135
+ AutolinkFilter
136
+ ], context
137
+
138
+ # Pipeline used for user provided content on the web
139
+ MarkdownPipeline = Pipeline.new [
140
+ MarkdownFilter,
141
+ SanitizationFilter,
142
+ CamoFilter,
143
+ ImageMaxWidthFilter,
144
+ HttpsFilter,
145
+ MentionFilter,
146
+ EmojiFilter,
147
+ SyntaxHighlightFilter
148
+ ], context.merge(:gfm => true) # enable github formatted markdown
149
+
150
+
151
+ # Define a pipeline based on another pipeline's filters
152
+ NonGFMMarkdownPipeline = Pipeline.new(MarkdownPipeline.filters,
153
+ context.merge(:gfm => false))
154
+
155
+ # Pipelines aren't limited to the web. You can use them for email
156
+ # processing also.
157
+ HtmlEmailPipeline = Pipeline.new [
158
+ ImageMaxWidthFilter
159
+ ], {}
160
+
161
+ # Just emoji.
162
+ EmojiPipeline = Pipeline.new [
163
+ HTMLInputFilter,
164
+ EmojiFilter
165
+ ], context
166
+ ```
167
+
168
+ ## Extending
169
+ To write a custom filter, you need a class with a `call` method that inherits
170
+ from `HTML::Pipeline::Filter`.
171
+
172
+ For example this filter adds a base url to images that are root relative:
173
+
174
+ ```ruby
175
+ require 'uri'
176
+
177
+ class RootRelativeFilter < HTML::Pipeline::Filter
178
+
179
+ def call
180
+ doc.search("img").each do |img|
181
+ next if img['src'].nil?
182
+ src = img['src'].strip
183
+ if src.start_with? '/'
184
+ img["src"] = URI.join(context[:base_url], src).to_s
185
+ end
186
+ end
187
+ doc
188
+ end
189
+
190
+ end
191
+ ```
192
+
193
+ Now this filter can be used in a pipeline:
194
+
195
+ ```ruby
196
+ Pipeline.new [ RootRelativeFilter ], { :base_url => 'http://somehost.com' }
197
+ ```
198
+
199
+ ## Instrumenting
200
+
201
+ Filters and Pipelines can be set up to be instrumented when called. The pipeline
202
+ must be setup with an [ActiveSupport::Notifications]
203
+ (http://api.rubyonrails.org/classes/ActiveSupport/Notifications.html)
204
+ compatible service object and a name. New pipeline objects will default to the
205
+ `HTML::Pipeline.default_instrumentation_service` object.
206
+
207
+ ``` ruby
208
+ # the AS::Notifications-compatible service object
209
+ service = ActiveSupport::Notifications
210
+
211
+ # instrument a specific pipeline
212
+ pipeline = HTML::Pipeline.new [MarkdownFilter], context
213
+ pipeline.setup_instrumentation "MarkdownPipeline", service
214
+
215
+ # or set default instrumentation service for all new pipelines
216
+ HTML::Pipeline.default_instrumentation_service = service
217
+ pipeline = HTML::Pipeline.new [MarkdownFilter], context
218
+ pipeline.setup_instrumentation "MarkdownPipeline"
219
+ ```
220
+
221
+ Filters are instrumented when they are run through the pipeline. A
222
+ `call_filter.html_pipeline` event is published once the filter finishes. The
223
+ `payload` should include the `filter` name. Each filter will trigger its own
224
+ instrumentation call.
225
+
226
+ ``` ruby
227
+ service.subscribe "call_filter.html_pipeline" do |event, start, ending, transaction_id, payload|
228
+ payload[:pipeline] #=> "MarkdownPipeline", set with `setup_instrumentation`
229
+ payload[:filter] #=> "MarkdownFilter"
230
+ payload[:context] #=> context Hash
231
+ payload[:result] #=> instance of result class
232
+ payload[:result][:output] #=> output HTML String or Nokogiri::DocumentFragment
233
+ end
234
+ ```
235
+
236
+ The full pipeline is also instrumented:
237
+
238
+ ``` ruby
239
+ service.subscribe "call_pipeline.html_pipeline" do |event, start, ending, transaction_id, payload|
240
+ payload[:pipeline] #=> "MarkdownPipeline", set with `setup_instrumentation`
241
+ payload[:filters] #=> ["MarkdownFilter"]
242
+ payload[:doc] #=> HTML String or Nokogiri::DocumentFragment
243
+ payload[:context] #=> context Hash
244
+ payload[:result] #=> instance of result class
245
+ payload[:result][:output] #=> output HTML String or Nokogiri::DocumentFragment
246
+ end
247
+ ```
248
+
249
+ ## Documentation
250
+
251
+ Full reference documentation can be [found here](http://rubydoc.info/gems/html-pipeline/frames).
252
+
253
+ ## Development
254
+
255
+ To see what has changed in recent versions, see the [CHANGELOG](https://github.com/jch/html-pipeline/blob/master/CHANGELOG.md).
256
+
257
+ ```sh
258
+ bundle
259
+ rake test
260
+ ```
261
+
262
+ ## Contributing
263
+
264
+ 1. [Fork it](https://help.github.com/articles/fork-a-repo)
265
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
266
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
267
+ 4. Push to the branch (`git push origin my-new-feature`)
268
+ 5. Create new [Pull Request](https://help.github.com/articles/using-pull-requests)
269
+
270
+ ## Contributors
271
+
272
+ Thanks to all of [these contributors](https://github.com/jch/html-pipeline/graphs/contributors).
273
+
274
+ Project is a member of the [OSS Manifesto](http://ossmanifesto.org/).
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
+ require 'rake/testtask'
4
+
5
+ Rake::TestTask.new do |t|
6
+ t.libs << "test"
7
+ t.test_files = FileList['test/**/*_test.rb']
8
+ t.verbose = true
9
+ end
10
+
11
+ task :default => :test
@@ -0,0 +1,80 @@
1
+ #!/usr/bin/env ruby
2
+ require 'html/pipeline'
3
+
4
+ require 'optparse'
5
+
6
+ # Accept "help", too
7
+ ARGV.map!{|a| a == "help" ? "--help" : a }
8
+
9
+ OptionParser.new do |opts|
10
+ opts.banner = <<-HELP.gsub(/^ /, '')
11
+ Usage: html-pipeline [-h] [-f]
12
+ html-pipeline [FILTER [FILTER [...]]] < file.md
13
+ cat file.md | html-pipeline [FILTER [FILTER [...]]]
14
+ HELP
15
+
16
+ opts.separator "Options:"
17
+
18
+ opts.on("-f", "--filters", "List the available filters") do
19
+ filters = HTML::Pipeline.constants.grep(/\w+Filter$/).
20
+ map{|f| f.to_s.gsub(/Filter$/,'') }
21
+
22
+ # Text filter doesn't work, no call method
23
+ filters -= ["Text"]
24
+
25
+ abort <<-HELP.gsub(/^ /, '')
26
+ Available filters:
27
+ #{filters.join("\n ")}
28
+ HELP
29
+ end
30
+ end.parse!
31
+
32
+ # Default to a GitHub-ish pipeline
33
+ if ARGV.empty?
34
+
35
+ filters = [
36
+ HTML::Pipeline::MarkdownFilter,
37
+ HTML::Pipeline::SanitizationFilter,
38
+ HTML::Pipeline::ImageMaxWidthFilter,
39
+ HTML::Pipeline::EmojiFilter,
40
+ HTML::Pipeline::AutolinkFilter,
41
+ HTML::Pipeline::TableOfContentsFilter,
42
+ ]
43
+
44
+ # Add syntax highlighting if linguist is present
45
+ begin
46
+ require 'linguist'
47
+ filters << HTML::Pipeline::SyntaxHighlightFilter
48
+ rescue LoadError
49
+ end
50
+
51
+ else
52
+
53
+ def filter_named(name)
54
+ case name
55
+ when "Text"
56
+ raise NameError # Text filter doesn't work, no call method
57
+ when "Textile"
58
+ require "RedCloth" # Textile filter doesn't require RedCloth
59
+ end
60
+
61
+ HTML::Pipeline.const_get("#{name}Filter")
62
+ rescue NameError => e
63
+ abort "Unknown filter '#{name}'. List filters with the -f option."
64
+ end
65
+
66
+ filters = []
67
+ until ARGV.empty?
68
+ name = ARGV.shift
69
+ filters << filter_named(name)
70
+ end
71
+
72
+ end
73
+
74
+ context = {
75
+ :asset_root => "/assets",
76
+ :base_url => "/",
77
+ :gfm => true
78
+ }
79
+
80
+ puts HTML::Pipeline.new(filters, context).call(ARGF.read)[:output]