html-pipeline 0.0.8 → 0.0.10
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +1 -1
- data/CHANGELOG.md +10 -0
- data/README.md +2 -1
- data/bin/html-pipeline +80 -0
- data/html-pipeline.gemspec +2 -2
- data/lib/html/pipeline.rb +18 -15
- data/lib/html/pipeline/@mention_filter.rb +3 -0
- data/lib/html/pipeline/absolute_source_filter.rb +48 -0
- data/lib/html/pipeline/version.rb +1 -1
- data/test/html/pipeline/absolute_source_filter_test.rb +56 -0
- data/test/html/pipeline/mention_filter_test.rb +1 -3
- metadata +10 -6
data/.gitignore
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,15 @@
|
|
1
1
|
# CHANGELOG
|
2
2
|
|
3
|
+
## 0.0.10
|
4
|
+
|
5
|
+
* add bin/html-pipeline util indirect #44
|
6
|
+
* add result[:mentioned_usernames] for MentionFilter fachen #42
|
7
|
+
|
8
|
+
## 0.0.9
|
9
|
+
|
10
|
+
* bump escape_utils ~> 0.3, github-linguist ~> 2.6.2 brianmario #41
|
11
|
+
* remove nokogiri monkey patch for ruby >= 1.9 defunkt #40
|
12
|
+
|
3
13
|
## 0.0.8
|
4
14
|
|
5
15
|
* raise LoadError instead of printing to stderr if linguist is missing. gjtorikian #36
|
data/README.md
CHANGED
@@ -85,7 +85,8 @@ filter.call
|
|
85
85
|
## Filters
|
86
86
|
|
87
87
|
* `MentionFilter` - replace `@user` mentions with links
|
88
|
-
* `
|
88
|
+
* `AbsoluteSourceFilter` - replace relative image urls with fully qualified versions
|
89
|
+
* `AutoLinkFilter` - auto_linking urls in HTML
|
89
90
|
* `CamoFilter` - replace http image urls with [camo-fied](https://github.com/atmos/camo) https versions
|
90
91
|
* `EmailReplyFilter` - util filter for working with emails
|
91
92
|
* `EmojiFilter` - everyone loves [emoji](http://www.emoji-cheat-sheet.com/)!
|
data/bin/html-pipeline
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'html/pipeline'
|
3
|
+
|
4
|
+
require 'optparse'
|
5
|
+
|
6
|
+
# Accept "help", too
|
7
|
+
ARGV.map!{|a| a == "help" ? "--help" : a }
|
8
|
+
|
9
|
+
OptionParser.new do |opts|
|
10
|
+
opts.banner = <<-HELP.gsub(/^ /, '')
|
11
|
+
Usage: html-pipeline [-h] [-f]
|
12
|
+
html-pipeline [FILTER [FILTER [...]]] < file.md
|
13
|
+
cat file.md | html-pipeline [FILTER [FILTER [...]]]
|
14
|
+
HELP
|
15
|
+
|
16
|
+
opts.separator "Options:"
|
17
|
+
|
18
|
+
opts.on("-f", "--filters", "List the available filters") do
|
19
|
+
filters = HTML::Pipeline.constants.grep(/\w+Filter$/).
|
20
|
+
map{|f| f.to_s.gsub(/Filter$/,'') }
|
21
|
+
|
22
|
+
# Text filter doesn't work, no call method
|
23
|
+
filters -= ["Text"]
|
24
|
+
|
25
|
+
abort <<-HELP.gsub(/^ /, '')
|
26
|
+
Available filters:
|
27
|
+
#{filters.join("\n ")}
|
28
|
+
HELP
|
29
|
+
end
|
30
|
+
end.parse!
|
31
|
+
|
32
|
+
# Default to a GitHub-ish pipeline
|
33
|
+
if ARGV.empty?
|
34
|
+
|
35
|
+
filters = [
|
36
|
+
HTML::Pipeline::MarkdownFilter,
|
37
|
+
HTML::Pipeline::SanitizationFilter,
|
38
|
+
HTML::Pipeline::ImageMaxWidthFilter,
|
39
|
+
HTML::Pipeline::EmojiFilter,
|
40
|
+
HTML::Pipeline::AutolinkFilter,
|
41
|
+
HTML::Pipeline::TableOfContentsFilter,
|
42
|
+
]
|
43
|
+
|
44
|
+
# Add syntax highlighting if linguist is present
|
45
|
+
begin
|
46
|
+
require 'linguist'
|
47
|
+
filters << HTML::Pipeline::SyntaxHighlightFilter
|
48
|
+
rescue LoadError
|
49
|
+
end
|
50
|
+
|
51
|
+
else
|
52
|
+
|
53
|
+
def filter_named(name)
|
54
|
+
case name
|
55
|
+
when "Text"
|
56
|
+
raise NameError # Text filter doesn't work, no call method
|
57
|
+
when "Textile"
|
58
|
+
require "RedCloth" # Textile filter doesn't require RedCloth
|
59
|
+
end
|
60
|
+
|
61
|
+
HTML::Pipeline.const_get("#{name}Filter")
|
62
|
+
rescue NameError => e
|
63
|
+
abort "Unknown filter '#{name}'. List filters with the -f option."
|
64
|
+
end
|
65
|
+
|
66
|
+
filters = []
|
67
|
+
until ARGV.empty?
|
68
|
+
name = ARGV.shift
|
69
|
+
filters << filter_named(name)
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|
73
|
+
|
74
|
+
context = {
|
75
|
+
:asset_root => "/assets",
|
76
|
+
:base_url => "/",
|
77
|
+
:gfm => true
|
78
|
+
}
|
79
|
+
|
80
|
+
puts HTML::Pipeline.new(filters, context).call(ARGF.read)[:output]
|
data/html-pipeline.gemspec
CHANGED
@@ -20,8 +20,8 @@ Gem::Specification.new do |gem|
|
|
20
20
|
gem.add_dependency "github-markdown", "~> 0.5"
|
21
21
|
gem.add_dependency "sanitize", "~> 2.0"
|
22
22
|
gem.add_dependency "rinku", "~> 1.7"
|
23
|
-
gem.add_dependency "escape_utils", "~> 0.
|
23
|
+
gem.add_dependency "escape_utils", "~> 0.3"
|
24
24
|
gem.add_dependency "activesupport", ">= 2"
|
25
25
|
|
26
|
-
gem.add_development_dependency "github-linguist", "~> 2.
|
26
|
+
gem.add_development_dependency "github-linguist", "~> 2.6.2"
|
27
27
|
end
|
data/lib/html/pipeline.rb
CHANGED
@@ -27,6 +27,7 @@ module HTML
|
|
27
27
|
autoload :VERSION, 'html/pipeline/version'
|
28
28
|
autoload :Pipeline, 'html/pipeline/pipeline'
|
29
29
|
autoload :Filter, 'html/pipeline/filter'
|
30
|
+
autoload :AbsoluteSourceFilter, 'html/pipeline/absolute_source_filter'
|
30
31
|
autoload :BodyContent, 'html/pipeline/body_content'
|
31
32
|
autoload :AutolinkFilter, 'html/pipeline/autolink_filter'
|
32
33
|
autoload :CamoFilter, 'html/pipeline/camo_filter'
|
@@ -108,23 +109,25 @@ module HTML
|
|
108
109
|
end
|
109
110
|
end
|
110
111
|
|
111
|
-
# XXX nokogiri monkey patches
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
replacement
|
112
|
+
# XXX nokogiri monkey patches for 1.8
|
113
|
+
if not ''.respond_to?(:force_encoding)
|
114
|
+
class Nokogiri::XML::Node
|
115
|
+
# Work around an issue with utf-8 encoded data being erroneously converted to
|
116
|
+
# ... some other shit when replacing text nodes. See 'utf-8 output 2' in
|
117
|
+
# user_content_test.rb for details.
|
118
|
+
def replace_with_encoding_fix(replacement)
|
119
|
+
if replacement.respond_to?(:to_str)
|
120
|
+
replacement = document.fragment("<div>#{replacement}</div>").children.first.children
|
121
|
+
end
|
122
|
+
replace_without_encoding_fix(replacement)
|
119
123
|
end
|
120
|
-
replace_without_encoding_fix(replacement)
|
121
|
-
end
|
122
124
|
|
123
|
-
|
124
|
-
|
125
|
+
alias_method :replace_without_encoding_fix, :replace
|
126
|
+
alias_method :replace, :replace_with_encoding_fix
|
125
127
|
|
126
|
-
|
127
|
-
|
128
|
-
|
128
|
+
def swap(replacement)
|
129
|
+
replace(replacement)
|
130
|
+
self
|
131
|
+
end
|
129
132
|
end
|
130
133
|
end
|
@@ -60,6 +60,8 @@ module HTML
|
|
60
60
|
IGNORE_PARENTS = %w(pre code a).to_set
|
61
61
|
|
62
62
|
def call
|
63
|
+
result[:mentioned_usernames] ||= []
|
64
|
+
|
63
65
|
doc.search('text()').each do |node|
|
64
66
|
content = node.to_html
|
65
67
|
next if !content.include?('@')
|
@@ -108,6 +110,7 @@ module HTML
|
|
108
110
|
end
|
109
111
|
|
110
112
|
def link_to_mentioned_user(login)
|
113
|
+
result[:mentioned_usernames] |= [login]
|
111
114
|
url = File.join(base_url, login)
|
112
115
|
"<a href='#{url}' class='user-mention'>" +
|
113
116
|
"@#{login}" +
|
@@ -0,0 +1,48 @@
|
|
1
|
+
require 'uri'
|
2
|
+
|
3
|
+
module HTML
|
4
|
+
class Pipeline
|
5
|
+
|
6
|
+
class AbsoluteSourceFilter < Filter
|
7
|
+
# HTML Filter for replacing relative and root relative image URLs with
|
8
|
+
# fully qualified URLs
|
9
|
+
#
|
10
|
+
# This is useful if an image is root relative but should really be going
|
11
|
+
# through a cdn, or if the content for the page assumes the host is known
|
12
|
+
# i.e. scraped webpages and some RSS feeds.
|
13
|
+
#
|
14
|
+
# Context options:
|
15
|
+
# :image_base_url - Base URL for image host for root relative src.
|
16
|
+
# :image_subpage_url - For relative src.
|
17
|
+
#
|
18
|
+
# This filter does not write additional information to the context.
|
19
|
+
# This filter would need to be run before CamoFilter.
|
20
|
+
def call
|
21
|
+
doc.search("img").each do |element|
|
22
|
+
next if element['src'].nil? || element['src'].empty?
|
23
|
+
src = element['src'].strip
|
24
|
+
unless src.start_with? 'http'
|
25
|
+
if src.start_with? '/'
|
26
|
+
base = image_base_url
|
27
|
+
else
|
28
|
+
base = image_subpage_url
|
29
|
+
end
|
30
|
+
element["src"] = URI.join(base, src).to_s
|
31
|
+
end
|
32
|
+
end
|
33
|
+
doc
|
34
|
+
end
|
35
|
+
|
36
|
+
# Private: the base url you want to use
|
37
|
+
def image_base_url
|
38
|
+
context[:image_base_url] or raise "Missing context :image_base_url for #{self.class.name}"
|
39
|
+
end
|
40
|
+
|
41
|
+
# Private: the relative url you want to use
|
42
|
+
def image_subpage_url
|
43
|
+
context[:image_subpage_url] or raise "Missing context :image_subpage_url for #{self.class.name}"
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require "test_helper"
|
2
|
+
|
3
|
+
class HTML::Pipeline::AbsoluteSourceFilterTest < Test::Unit::TestCase
|
4
|
+
AbsoluteSourceFilter = HTML::Pipeline::AbsoluteSourceFilter
|
5
|
+
|
6
|
+
def setup
|
7
|
+
@image_base_url = 'http://assets.example.com'
|
8
|
+
@image_subpage_url = 'http://blog.example.com/a/post'
|
9
|
+
@options = {
|
10
|
+
:image_base_url => @image_base_url,
|
11
|
+
:image_subpage_url => @image_subpage_url
|
12
|
+
}
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_rewrites_root_relative_urls
|
16
|
+
orig = %(<p><img src="/img.png"></p>)
|
17
|
+
puts AbsoluteSourceFilter.call(orig, @options).to_s
|
18
|
+
assert_equal "<p><img src=\"#{@image_base_url}/img.png\"></p>",
|
19
|
+
AbsoluteSourceFilter.call(orig, @options).to_s
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_rewrites_root_relative_urls
|
23
|
+
orig = %(<p><img src="post/img.png"></p>)
|
24
|
+
assert_equal "<p><img src=\"#{@image_subpage_url}/img.png\"></p>",
|
25
|
+
AbsoluteSourceFilter.call(orig, @options).to_s
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_does_not_rewrite_absolute_urls
|
29
|
+
orig = %(<p><img src="http://other.example.com/img.png"></p>)
|
30
|
+
result = AbsoluteSourceFilter.call(orig, @options).to_s
|
31
|
+
assert_no_match /@image_base_url/, result
|
32
|
+
assert_no_match /@image_subpage_url/, result
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_fails_when_context_is_missing
|
36
|
+
assert_raise RuntimeError do
|
37
|
+
AbsoluteSourceFilter.call("<img src=\"img.png\">", {})
|
38
|
+
end
|
39
|
+
assert_raise RuntimeError do
|
40
|
+
AbsoluteSourceFilter.call("<img src=\"/img.png\">", {})
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_tells_you_where_context_is_required
|
45
|
+
exception = assert_raise(RuntimeError) {
|
46
|
+
AbsoluteSourceFilter.call("<img src=\"img.png\">", {})
|
47
|
+
}
|
48
|
+
assert_match 'HTML::Pipeline::AbsoluteSourceFilter', exception.message
|
49
|
+
|
50
|
+
exception = assert_raise(RuntimeError) {
|
51
|
+
AbsoluteSourceFilter.call("<img src=\"/img.png\">", {})
|
52
|
+
}
|
53
|
+
assert_match 'HTML::Pipeline::AbsoluteSourceFilter', exception.message
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
@@ -76,9 +76,7 @@ class HTML::Pipeline::MentionFilterTest < Test::Unit::TestCase
|
|
76
76
|
def mentioned_usernames
|
77
77
|
result = {}
|
78
78
|
MarkdownPipeline.call(@body, {}, result)
|
79
|
-
|
80
|
-
users = html.scan(/user-mention">@(.+?)</)
|
81
|
-
users ? users.flatten.uniq : []
|
79
|
+
result[:mentioned_usernames]
|
82
80
|
end
|
83
81
|
|
84
82
|
def test_matches_usernames_in_body
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html-pipeline
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.10
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2013-
|
13
|
+
date: 2013-03-21 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: gemoji
|
@@ -99,7 +99,7 @@ dependencies:
|
|
99
99
|
requirements:
|
100
100
|
- - ~>
|
101
101
|
- !ruby/object:Gem::Version
|
102
|
-
version: '0.
|
102
|
+
version: '0.3'
|
103
103
|
type: :runtime
|
104
104
|
prerelease: false
|
105
105
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -107,7 +107,7 @@ dependencies:
|
|
107
107
|
requirements:
|
108
108
|
- - ~>
|
109
109
|
- !ruby/object:Gem::Version
|
110
|
-
version: '0.
|
110
|
+
version: '0.3'
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
112
|
name: activesupport
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|
@@ -131,7 +131,7 @@ dependencies:
|
|
131
131
|
requirements:
|
132
132
|
- - ~>
|
133
133
|
- !ruby/object:Gem::Version
|
134
|
-
version:
|
134
|
+
version: 2.6.2
|
135
135
|
type: :development
|
136
136
|
prerelease: false
|
137
137
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -139,7 +139,7 @@ dependencies:
|
|
139
139
|
requirements:
|
140
140
|
- - ~>
|
141
141
|
- !ruby/object:Gem::Version
|
142
|
-
version:
|
142
|
+
version: 2.6.2
|
143
143
|
description: GitHub HTML processing filters and utilities
|
144
144
|
email:
|
145
145
|
- ryan@github.com
|
@@ -155,9 +155,11 @@ files:
|
|
155
155
|
- LICENSE
|
156
156
|
- README.md
|
157
157
|
- Rakefile
|
158
|
+
- bin/html-pipeline
|
158
159
|
- html-pipeline.gemspec
|
159
160
|
- lib/html/pipeline.rb
|
160
161
|
- lib/html/pipeline/@mention_filter.rb
|
162
|
+
- lib/html/pipeline/absolute_source_filter.rb
|
161
163
|
- lib/html/pipeline/autolink_filter.rb
|
162
164
|
- lib/html/pipeline/body_content.rb
|
163
165
|
- lib/html/pipeline/camo_filter.rb
|
@@ -174,6 +176,7 @@ files:
|
|
174
176
|
- lib/html/pipeline/textile_filter.rb
|
175
177
|
- lib/html/pipeline/toc_filter.rb
|
176
178
|
- lib/html/pipeline/version.rb
|
179
|
+
- test/html/pipeline/absolute_source_filter_test.rb
|
177
180
|
- test/html/pipeline/autolink_filter_test.rb
|
178
181
|
- test/html/pipeline/camo_filter_test.rb
|
179
182
|
- test/html/pipeline/emoji_filter_test.rb
|
@@ -210,6 +213,7 @@ signing_key:
|
|
210
213
|
specification_version: 3
|
211
214
|
summary: Helpers for processing content through a chain of filters
|
212
215
|
test_files:
|
216
|
+
- test/html/pipeline/absolute_source_filter_test.rb
|
213
217
|
- test/html/pipeline/autolink_filter_test.rb
|
214
218
|
- test/html/pipeline/camo_filter_test.rb
|
215
219
|
- test/html/pipeline/emoji_filter_test.rb
|