geothird-html-pipeline 0.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +19 -0
  3. data/.travis.yml +13 -0
  4. data/CHANGELOG.md +43 -0
  5. data/Gemfile +9 -0
  6. data/LICENSE +22 -0
  7. data/README.md +274 -0
  8. data/Rakefile +11 -0
  9. data/bin/html-pipeline +80 -0
  10. data/geothird-html-pipeline.gemspec +27 -0
  11. data/lib/html/pipeline.rb +198 -0
  12. data/lib/html/pipeline/@mention_filter.rb +121 -0
  13. data/lib/html/pipeline/absolute_source_filter.rb +48 -0
  14. data/lib/html/pipeline/autolink_filter.rb +22 -0
  15. data/lib/html/pipeline/body_content.rb +42 -0
  16. data/lib/html/pipeline/camo_filter.rb +70 -0
  17. data/lib/html/pipeline/email_reply_filter.rb +56 -0
  18. data/lib/html/pipeline/emoji_filter.rb +54 -0
  19. data/lib/html/pipeline/filter.rb +178 -0
  20. data/lib/html/pipeline/https_filter.rb +13 -0
  21. data/lib/html/pipeline/image_max_width_filter.rb +37 -0
  22. data/lib/html/pipeline/markdown_filter.rb +29 -0
  23. data/lib/html/pipeline/plain_text_input_filter.rb +11 -0
  24. data/lib/html/pipeline/sanitization_filter.rb +105 -0
  25. data/lib/html/pipeline/syntax_highlight_filter.rb +33 -0
  26. data/lib/html/pipeline/text_filter.rb +14 -0
  27. data/lib/html/pipeline/textile_filter.rb +21 -0
  28. data/lib/html/pipeline/toc_filter.rb +28 -0
  29. data/lib/html/pipeline/version.rb +5 -0
  30. data/test/helpers/mocked_instrumentation_service.rb +17 -0
  31. data/test/html/pipeline/absolute_source_filter_test.rb +56 -0
  32. data/test/html/pipeline/autolink_filter_test.rb +22 -0
  33. data/test/html/pipeline/camo_filter_test.rb +47 -0
  34. data/test/html/pipeline/emoji_filter_test.rb +18 -0
  35. data/test/html/pipeline/image_max_width_filter_test.rb +50 -0
  36. data/test/html/pipeline/markdown_filter_test.rb +101 -0
  37. data/test/html/pipeline/mention_filter_test.rb +156 -0
  38. data/test/html/pipeline/plain_text_input_filter_test.rb +22 -0
  39. data/test/html/pipeline/sanitization_filter_test.rb +47 -0
  40. data/test/html/pipeline/toc_filter_test.rb +47 -0
  41. data/test/html/pipeline_test.rb +74 -0
  42. data/test/test_helper.rb +38 -0
  43. metadata +213 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 5345ec14f85df8e82bc3aca081c65e4b94aa7878
4
+ data.tar.gz: d6bdcd46d8baad4c6bda04d5f1ecedaedde72aa0
5
+ SHA512:
6
+ metadata.gz: f5d4cf9bc9dca29a4330cc8000889f64e9ea1fffb0fef0fec01f50d20f8870f60961f0e1bb997d210f71c6a4619cc915b993de6689e2b9ab4741192ebfaf9f41
7
+ data.tar.gz: 6688675d57544c1004330867842bd3062970c74322e5fe4800bb37f30466c83d5d06588c859418503520dfc18b3ab0f8146e9d3904dce4a4d7b6a65c8c04dbf0
@@ -0,0 +1,19 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ exec/*
19
+ vendor/gems
@@ -0,0 +1,13 @@
1
+ language: ruby
2
+
3
+ before_install:
4
+ - sudo apt-get update -qq
5
+ - sudo apt-get install -qq libicu-dev
6
+
7
+ script: "bundle exec rake"
8
+
9
+ rvm:
10
+ - 1.8.7
11
+ - 1.9.2
12
+ - 1.9.3
13
+ - ree
@@ -0,0 +1,43 @@
1
+ # CHANGELOG
2
+
3
+ ## 0.0.12
4
+
5
+ * add additional payload information for instrumentation mtodd #46
6
+ * generate and link to gem docs in README
7
+
8
+ ## 0.0.11
9
+
10
+ * add instrumentation support. readme cleanup mtodd #45
11
+
12
+ ## 0.0.10
13
+
14
+ * add bin/html-pipeline util indirect #44
15
+ * add result[:mentioned_usernames] for MentionFilter fachen #42
16
+
17
+ ## 0.0.9
18
+
19
+ * bump escape_utils ~> 0.3, github-linguist ~> 2.6.2 brianmario #41
20
+ * remove nokogiri monkey patch for ruby >= 1.9 defunkt #40
21
+
22
+ ## 0.0.8
23
+
24
+ * raise LoadError instead of printing to stderr if linguist is missing. gjtorikian #36
25
+
26
+ ## 0.0.7
27
+
28
+ * optionally require github-linguist chrislloyd #33
29
+
30
+ ## 0.0.6
31
+
32
+ * don't mutate markdown strings: jakedouglas #32
33
+
34
+ ## 0.0.5
35
+
36
+ * fix li xss vulnerability in sanitization filter: vmg #31
37
+ * gemspec cleanup: nbibler #23, jbarnette #24
38
+ * doc updates: jch #16, pborreli #17, wickedshimmy #18, benubois #19, blackerby #21
39
+ * loosen gemoji dependency: josh #15
40
+
41
+ ## 0.0.4
42
+
43
+ * initial public release
data/Gemfile ADDED
@@ -0,0 +1,9 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in html-pipeline.gemspec
4
+ gemspec
5
+
6
+ group :development do
7
+ gem 'bundler'
8
+ gem 'rake'
9
+ end
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 GitHub Inc. and Jerry Cheung
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,274 @@
1
+ # HTML::Pipeline [![Build Status](https://secure.travis-ci.org/jch/html-pipeline.png)](http://travis-ci.org/jch/html-pipeline)
2
+
3
+ GitHub HTML processing filters and utilities. This module includes a small
4
+ framework for defining DOM based content filters and applying them to user
5
+ provided content. Read an introduction about this project in
6
+ [this blog post](https://github.com/blog/1311-html-pipeline-chainable-content-filters).
7
+
8
+ ## Installation
9
+
10
+ Add this line to your application's Gemfile:
11
+
12
+ ```ruby
13
+ gem 'html-pipeline'
14
+ ```
15
+
16
+ And then execute:
17
+
18
+ ```sh
19
+ $ bundle
20
+ ```
21
+
22
+ Or install it yourself as:
23
+
24
+ ```sh
25
+ $ gem install html-pipeline
26
+ ```
27
+
28
+ ## Usage
29
+
30
+ This library provides a handful of chainable HTML filters to transform user
31
+ content into markup. A filter takes an HTML string or
32
+ `Nokogiri::HTML::DocumentFragment`, optionally manipulates it, and then
33
+ outputs the result.
34
+
35
+ For example, to transform Markdown source into Markdown HTML:
36
+
37
+ ```ruby
38
+ require 'html/pipeline'
39
+
40
+ filter = HTML::Pipeline::MarkdownFilter.new("Hi **world**!")
41
+ filter.call
42
+ ```
43
+
44
+ Filters can be combined into a pipeline which causes each filter to hand its
45
+ output to the next filter's input. So if you wanted to have content be
46
+ filtered through Markdown and be syntax highlighted, you can create the
47
+ following pipeline:
48
+
49
+ ```ruby
50
+ pipeline = HTML::Pipeline.new [
51
+ HTML::Pipeline::MarkdownFilter,
52
+ HTML::Pipeline::SyntaxHighlightFilter
53
+ ]
54
+ result = pipeline.call <<-CODE
55
+ This is *great*:
56
+
57
+ some_code(:first)
58
+
59
+ CODE
60
+ result[:output].to_s
61
+ ```
62
+
63
+ Prints:
64
+
65
+ ```html
66
+ <p>This is <em>great</em>:</p>
67
+
68
+ <div class="highlight">
69
+ <pre><span class="n">some_code</span><span class="p">(</span><span class="ss">:first</span><span class="p">)</span>
70
+ </pre>
71
+ </div>
72
+ ```
73
+
74
+ Some filters take an optional **context** and/or **result** hash. These are
75
+ used to pass around arguments and metadata between filters in a pipeline. For
76
+ example, if you want don't want to use GitHub formatted Markdown, you can
77
+ pass an option in the context hash:
78
+
79
+ ```ruby
80
+ filter = HTML::Pipeline::MarkdownFilter.new("Hi **world**!", :gfm => false)
81
+ filter.call
82
+ ```
83
+
84
+ ## Filters
85
+
86
+ * `MentionFilter` - replace `@user` mentions with links
87
+ * `AbsoluteSourceFilter` - replace relative image urls with fully qualified versions
88
+ * `AutoLinkFilter` - auto_linking urls in HTML
89
+ * `CamoFilter` - replace http image urls with [camo-fied](https://github.com/atmos/camo) https versions
90
+ * `EmailReplyFilter` - util filter for working with emails
91
+ * `EmojiFilter` - everyone loves [emoji](http://www.emoji-cheat-sheet.com/)!
92
+ * `HttpsFilter` - HTML Filter for replacing http github urls with https versions.
93
+ * `ImageMaxWidthFilter` - link to full size image for large images
94
+ * `MarkdownFilter` - convert markdown to html
95
+ * `PlainTextInputFilter` - html escape text and wrap the result in a div
96
+ * `SanitizationFilter` - whitelist sanitize user markup
97
+ * `SyntaxHighlightFilter` - [code syntax highlighter](#syntax-highlighting)
98
+ * `TextileFilter` - convert textile to html
99
+ * `TableOfContentsFilter` - anchor headings with name attributes
100
+
101
+ ## Syntax highlighting
102
+
103
+ `SyntaxHighlightFilter` uses [github-linguist](https://github.com/github/linguist)
104
+ to detect and highlight languages. It isn't included as a dependency by default
105
+ because it's a large dependency and
106
+ [a hassle to build on heroku](https://github.com/jch/html-pipeline/issues/33).
107
+ To use the filter, add the following to your Gemfile:
108
+
109
+ ```ruby
110
+ gem 'github-linguist'
111
+ ```
112
+
113
+ ## Examples
114
+
115
+ We define different pipelines for different parts of our app. Here are a few
116
+ paraphrased snippets to get you started:
117
+
118
+ ```ruby
119
+ # The context hash is how you pass options between different filters.
120
+ # See individual filter source for explanation of options.
121
+ context = {
122
+ :asset_root => "http://your-domain.com/where/your/images/live/icons",
123
+ :base_url => "http://your-domain.com"
124
+ }
125
+
126
+ # Pipeline providing sanitization and image hijacking but no mention
127
+ # related features.
128
+ SimplePipeline = Pipeline.new [
129
+ SanitizationFilter,
130
+ TableOfContentsFilter, # add 'name' anchors to all headers
131
+ CamoFilter,
132
+ ImageMaxWidthFilter,
133
+ SyntaxHighlightFilter,
134
+ EmojiFilter,
135
+ AutolinkFilter
136
+ ], context
137
+
138
+ # Pipeline used for user provided content on the web
139
+ MarkdownPipeline = Pipeline.new [
140
+ MarkdownFilter,
141
+ SanitizationFilter,
142
+ CamoFilter,
143
+ ImageMaxWidthFilter,
144
+ HttpsFilter,
145
+ MentionFilter,
146
+ EmojiFilter,
147
+ SyntaxHighlightFilter
148
+ ], context.merge(:gfm => true) # enable github formatted markdown
149
+
150
+
151
+ # Define a pipeline based on another pipeline's filters
152
+ NonGFMMarkdownPipeline = Pipeline.new(MarkdownPipeline.filters,
153
+ context.merge(:gfm => false))
154
+
155
+ # Pipelines aren't limited to the web. You can use them for email
156
+ # processing also.
157
+ HtmlEmailPipeline = Pipeline.new [
158
+ ImageMaxWidthFilter
159
+ ], {}
160
+
161
+ # Just emoji.
162
+ EmojiPipeline = Pipeline.new [
163
+ HTMLInputFilter,
164
+ EmojiFilter
165
+ ], context
166
+ ```
167
+
168
+ ## Extending
169
+ To write a custom filter, you need a class with a `call` method that inherits
170
+ from `HTML::Pipeline::Filter`.
171
+
172
+ For example this filter adds a base url to images that are root relative:
173
+
174
+ ```ruby
175
+ require 'uri'
176
+
177
+ class RootRelativeFilter < HTML::Pipeline::Filter
178
+
179
+ def call
180
+ doc.search("img").each do |img|
181
+ next if img['src'].nil?
182
+ src = img['src'].strip
183
+ if src.start_with? '/'
184
+ img["src"] = URI.join(context[:base_url], src).to_s
185
+ end
186
+ end
187
+ doc
188
+ end
189
+
190
+ end
191
+ ```
192
+
193
+ Now this filter can be used in a pipeline:
194
+
195
+ ```ruby
196
+ Pipeline.new [ RootRelativeFilter ], { :base_url => 'http://somehost.com' }
197
+ ```
198
+
199
+ ## Instrumenting
200
+
201
+ Filters and Pipelines can be set up to be instrumented when called. The pipeline
202
+ must be setup with an [ActiveSupport::Notifications]
203
+ (http://api.rubyonrails.org/classes/ActiveSupport/Notifications.html)
204
+ compatible service object and a name. New pipeline objects will default to the
205
+ `HTML::Pipeline.default_instrumentation_service` object.
206
+
207
+ ``` ruby
208
+ # the AS::Notifications-compatible service object
209
+ service = ActiveSupport::Notifications
210
+
211
+ # instrument a specific pipeline
212
+ pipeline = HTML::Pipeline.new [MarkdownFilter], context
213
+ pipeline.setup_instrumentation "MarkdownPipeline", service
214
+
215
+ # or set default instrumentation service for all new pipelines
216
+ HTML::Pipeline.default_instrumentation_service = service
217
+ pipeline = HTML::Pipeline.new [MarkdownFilter], context
218
+ pipeline.setup_instrumentation "MarkdownPipeline"
219
+ ```
220
+
221
+ Filters are instrumented when they are run through the pipeline. A
222
+ `call_filter.html_pipeline` event is published once the filter finishes. The
223
+ `payload` should include the `filter` name. Each filter will trigger its own
224
+ instrumentation call.
225
+
226
+ ``` ruby
227
+ service.subscribe "call_filter.html_pipeline" do |event, start, ending, transaction_id, payload|
228
+ payload[:pipeline] #=> "MarkdownPipeline", set with `setup_instrumentation`
229
+ payload[:filter] #=> "MarkdownFilter"
230
+ payload[:context] #=> context Hash
231
+ payload[:result] #=> instance of result class
232
+ payload[:result][:output] #=> output HTML String or Nokogiri::DocumentFragment
233
+ end
234
+ ```
235
+
236
+ The full pipeline is also instrumented:
237
+
238
+ ``` ruby
239
+ service.subscribe "call_pipeline.html_pipeline" do |event, start, ending, transaction_id, payload|
240
+ payload[:pipeline] #=> "MarkdownPipeline", set with `setup_instrumentation`
241
+ payload[:filters] #=> ["MarkdownFilter"]
242
+ payload[:doc] #=> HTML String or Nokogiri::DocumentFragment
243
+ payload[:context] #=> context Hash
244
+ payload[:result] #=> instance of result class
245
+ payload[:result][:output] #=> output HTML String or Nokogiri::DocumentFragment
246
+ end
247
+ ```
248
+
249
+ ## Documentation
250
+
251
+ Full reference documentation can be [found here](http://rubydoc.info/gems/html-pipeline/frames).
252
+
253
+ ## Development
254
+
255
+ To see what has changed in recent versions, see the [CHANGELOG](https://github.com/jch/html-pipeline/blob/master/CHANGELOG.md).
256
+
257
+ ```sh
258
+ bundle
259
+ rake test
260
+ ```
261
+
262
+ ## Contributing
263
+
264
+ 1. [Fork it](https://help.github.com/articles/fork-a-repo)
265
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
266
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
267
+ 4. Push to the branch (`git push origin my-new-feature`)
268
+ 5. Create new [Pull Request](https://help.github.com/articles/using-pull-requests)
269
+
270
+ ## Contributors
271
+
272
+ Thanks to all of [these contributors](https://github.com/jch/html-pipeline/graphs/contributors).
273
+
274
+ Project is a member of the [OSS Manifesto](http://ossmanifesto.org/).
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
+ require 'rake/testtask'
4
+
5
+ Rake::TestTask.new do |t|
6
+ t.libs << "test"
7
+ t.test_files = FileList['test/**/*_test.rb']
8
+ t.verbose = true
9
+ end
10
+
11
+ task :default => :test
@@ -0,0 +1,80 @@
1
+ #!/usr/bin/env ruby
2
+ require 'html/pipeline'
3
+
4
+ require 'optparse'
5
+
6
+ # Accept "help", too
7
+ ARGV.map!{|a| a == "help" ? "--help" : a }
8
+
9
+ OptionParser.new do |opts|
10
+ opts.banner = <<-HELP.gsub(/^ /, '')
11
+ Usage: html-pipeline [-h] [-f]
12
+ html-pipeline [FILTER [FILTER [...]]] < file.md
13
+ cat file.md | html-pipeline [FILTER [FILTER [...]]]
14
+ HELP
15
+
16
+ opts.separator "Options:"
17
+
18
+ opts.on("-f", "--filters", "List the available filters") do
19
+ filters = HTML::Pipeline.constants.grep(/\w+Filter$/).
20
+ map{|f| f.to_s.gsub(/Filter$/,'') }
21
+
22
+ # Text filter doesn't work, no call method
23
+ filters -= ["Text"]
24
+
25
+ abort <<-HELP.gsub(/^ /, '')
26
+ Available filters:
27
+ #{filters.join("\n ")}
28
+ HELP
29
+ end
30
+ end.parse!
31
+
32
+ # Default to a GitHub-ish pipeline
33
+ if ARGV.empty?
34
+
35
+ filters = [
36
+ HTML::Pipeline::MarkdownFilter,
37
+ HTML::Pipeline::SanitizationFilter,
38
+ HTML::Pipeline::ImageMaxWidthFilter,
39
+ HTML::Pipeline::EmojiFilter,
40
+ HTML::Pipeline::AutolinkFilter,
41
+ HTML::Pipeline::TableOfContentsFilter,
42
+ ]
43
+
44
+ # Add syntax highlighting if linguist is present
45
+ begin
46
+ require 'linguist'
47
+ filters << HTML::Pipeline::SyntaxHighlightFilter
48
+ rescue LoadError
49
+ end
50
+
51
+ else
52
+
53
+ def filter_named(name)
54
+ case name
55
+ when "Text"
56
+ raise NameError # Text filter doesn't work, no call method
57
+ when "Textile"
58
+ require "RedCloth" # Textile filter doesn't require RedCloth
59
+ end
60
+
61
+ HTML::Pipeline.const_get("#{name}Filter")
62
+ rescue NameError => e
63
+ abort "Unknown filter '#{name}'. List filters with the -f option."
64
+ end
65
+
66
+ filters = []
67
+ until ARGV.empty?
68
+ name = ARGV.shift
69
+ filters << filter_named(name)
70
+ end
71
+
72
+ end
73
+
74
+ context = {
75
+ :asset_root => "/assets",
76
+ :base_url => "/",
77
+ :gfm => true
78
+ }
79
+
80
+ puts HTML::Pipeline.new(filters, context).call(ARGF.read)[:output]