zumobi 0.0.11 → 0.0.12
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/zumobi.rb +4 -3
- data/lib/zumobi/amazon_s3.rb +27 -0
- data/lib/zumobi/gzip_reader.rb +15 -0
- data/lib/zumobi/gzip_writer.rb +16 -0
- data/lib/zumobi/removing_sanitize.rb +154 -0
- data/lib/zumobi/version.rb +1 -1
- metadata +26 -8
data/lib/zumobi.rb
CHANGED
@@ -1,10 +1,11 @@
|
|
1
1
|
require "zumobi/exception_handler"
|
2
2
|
require "zumobi/notifications_logger"
|
3
3
|
require "zumobi/notifier"
|
4
|
+
require "zumobi/amazon_s3"
|
5
|
+
require "zumobi/gzip_reader"
|
6
|
+
require "zumobi/gzip_writer"
|
7
|
+
require "zumobi/removing_sanitize"
|
4
8
|
require "zumobi/version"
|
5
9
|
|
6
10
|
module Zumobi
|
7
|
-
|
8
|
-
|
9
|
-
|
10
11
|
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Zumobi
|
2
|
+
class S3
|
3
|
+
def self.establish_s3_connection
|
4
|
+
config = Zumobi::S3.config()
|
5
|
+
AWS::S3::Base.establish_connection!( :access_key_id => config.access_key_id, :secret_access_key => config.secret_access_key )
|
6
|
+
puts "Connected to s3"
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.save_in_s3(data, options=Hash.new)
|
10
|
+
establish_s3_connection
|
11
|
+
config = Zumobi::S3.config()
|
12
|
+
puts "Sending file to http://#{config.bucket}/#{config.path}"
|
13
|
+
AWS::S3::S3Object.store(config.path, data, config.bucket, options)
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def self.config
|
19
|
+
config = SimpleConfig.for(:amazon_s3)
|
20
|
+
if (config.access_key_id.blank? || config.access_key_id.blank? || config.bucket.blank? || config.path.blank? )
|
21
|
+
raise "You must include a simple config :amazon_s3 with :access_key_id, :secret_access_key, :bucket and :path for this application."
|
22
|
+
end
|
23
|
+
config
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'zlib'
|
2
|
+
|
3
|
+
module Zumobi
|
4
|
+
class GzipWriter
|
5
|
+
|
6
|
+
def self.gzip(string)
|
7
|
+
#http://devblog.famundo.com/articles/2007/03/02/serving-compressed-content-from-amazons-s3
|
8
|
+
strio = StringIO.open('', 'w')
|
9
|
+
gz = Zlib::GzipWriter.new(strio)
|
10
|
+
gz.write(string)
|
11
|
+
gz.close
|
12
|
+
strio.string
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,154 @@
|
|
1
|
+
# A wrapper to add the ability to specify "removals" when sanitizing HTML. The library we're using will
|
2
|
+
# remove tags, but it cannot be told to remove a tag with all of its child content, as we would want
|
3
|
+
# for a script or object tag, for example.
|
4
|
+
module Zumobi
|
5
|
+
class RemovingSanitize
|
6
|
+
module Config
|
7
|
+
PLAINTEXT = {}
|
8
|
+
|
9
|
+
SPORTS = {
|
10
|
+
# Leave script in, as well delete it and all its contents specifically.
|
11
|
+
:elements => ['img', 'a', 'p', 'b', 'i', 'strong', 'em', 'cite', 'dl', 'dt', 'dd', 'ul', 'ol', 'li', 'small',
|
12
|
+
'big', 'strike', 's', 'u', 'br', 'table', 'tr', 'td', 'th'],
|
13
|
+
:attributes => {
|
14
|
+
'a' => ['href'],
|
15
|
+
'img' => ['src', 'width', 'height']
|
16
|
+
},
|
17
|
+
:protocols => { 'a' => {'href' => ['http']} },
|
18
|
+
:removals => ['script']
|
19
|
+
}
|
20
|
+
|
21
|
+
RESTRICTED = {
|
22
|
+
:elements => ['b','em','i','strong','u']
|
23
|
+
}
|
24
|
+
|
25
|
+
ZUMOBI = {
|
26
|
+
:elements => [
|
27
|
+
'a', 'b', 'blockquote', 'br', 'caption', 'cite', 'code', 'col',
|
28
|
+
'colgroup', 'dd', 'dl', 'dt', 'em', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
|
29
|
+
'i', 'img', 'li', 'ol', 'p', 'pre', 'q', 'small', 'strike', 'strong',
|
30
|
+
'sub', 'sup', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'u',
|
31
|
+
'ul'],
|
32
|
+
:attributes => {
|
33
|
+
'a' => ['href'],
|
34
|
+
'blockquote' => ['cite'],
|
35
|
+
'col' => ['span', 'width'],
|
36
|
+
'colgroup' => ['span', 'width'],
|
37
|
+
'img' => ['src', 'width'],
|
38
|
+
'ol' => ['start', 'type'],
|
39
|
+
'q' => ['cite'],
|
40
|
+
'table' => ['summary', 'width'],
|
41
|
+
'td' => ['abbr', 'axis', 'colspan', 'rowspan', 'width'],
|
42
|
+
'th' => ['abbr', 'axis', 'colspan', 'rowspan', 'scope','width'],
|
43
|
+
'table' => ['width'],
|
44
|
+
'ul' => ['type']
|
45
|
+
},
|
46
|
+
:protocols => {
|
47
|
+
'a' => {'href' => ['http', 'https', 'mailto', :relative]},
|
48
|
+
'img' => {'src' => ['http', 'https', :relative]}
|
49
|
+
},
|
50
|
+
:removals => ['iframe', 'script', 'style']
|
51
|
+
}
|
52
|
+
|
53
|
+
PRESERVE_YOUTUBE = {
|
54
|
+
:elements => [
|
55
|
+
'a', 'b', 'blockquote', 'br', 'caption', 'cite', 'code', 'col',
|
56
|
+
'colgroup', 'dd', 'dl', 'dt', 'em', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
|
57
|
+
'i', 'img', 'li', 'ol', 'p', 'pre', 'q', 'small', 'strike', 'strong',
|
58
|
+
'sub', 'sup', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'u',
|
59
|
+
'ul', 'object', 'embed', 'param', 'iframe'],
|
60
|
+
:attributes => {
|
61
|
+
'a' => ['href'],
|
62
|
+
'blockquote' => ['cite'],
|
63
|
+
'col' => ['span', 'width'],
|
64
|
+
'colgroup' => ['span', 'width'],
|
65
|
+
'img' => ['src', 'width'],
|
66
|
+
'ol' => ['start', 'type'],
|
67
|
+
'q' => ['cite'],
|
68
|
+
'td' => ['abbr', 'axis', 'colspan', 'rowspan', 'width'],
|
69
|
+
'th' => ['abbr', 'axis', 'colspan', 'rowspan', 'scope','width'],
|
70
|
+
'table' => ['width'],
|
71
|
+
'ul' => ['type'],
|
72
|
+
'obj' => ['width', 'height'],
|
73
|
+
'embed' => ['src', 'type', 'allowscriptaccess', 'allowfullscreen', 'width', 'height'],
|
74
|
+
'param' => ['name', 'value'],
|
75
|
+
'iframe' => ['src']
|
76
|
+
},
|
77
|
+
:protocols => {
|
78
|
+
'a' => {'href' => ['http', 'https', 'mailto', :relative]},
|
79
|
+
'img' => {'src' => ['http', 'https', :relative]}
|
80
|
+
},
|
81
|
+
:removals => ['script', 'style']
|
82
|
+
}
|
83
|
+
|
84
|
+
REMOTE_HTML = {
|
85
|
+
:elements => %w[
|
86
|
+
a abbr b bdo blockquote br caption cite code col colgroup dd del dfn div dl
|
87
|
+
dt em figcaption figure h1 h2 h3 h4 h5 h6 hgroup i img ins kbd li mark
|
88
|
+
ol p pre q rp rt ruby s samp small span strike strong sub sup table tbody td
|
89
|
+
tfoot th thead time tr u ul var wbr
|
90
|
+
],
|
91
|
+
|
92
|
+
:attributes => {
|
93
|
+
:all => ['dir', 'lang', 'title', 'class', 'id'],
|
94
|
+
'a' => ['href'],
|
95
|
+
'blockquote' => ['cite'],
|
96
|
+
'col' => ['span', 'width'],
|
97
|
+
'colgroup' => ['span', 'width'],
|
98
|
+
'del' => ['cite', 'datetime'],
|
99
|
+
'img' => ['align', 'alt', 'height', 'src', 'width'],
|
100
|
+
'ins' => ['cite', 'datetime'],
|
101
|
+
'ol' => ['start', 'reversed', 'type'],
|
102
|
+
'q' => ['cite'],
|
103
|
+
'table' => ['summary', 'width'],
|
104
|
+
'td' => ['abbr', 'axis', 'colspan', 'rowspan', 'width'],
|
105
|
+
'th' => ['abbr', 'axis', 'colspan', 'rowspan', 'scope', 'width'],
|
106
|
+
'time' => ['datetime', 'pubdate'],
|
107
|
+
'ul' => ['type']
|
108
|
+
},
|
109
|
+
|
110
|
+
:protocols => {
|
111
|
+
'a' => {'href' => ['ftp', 'http', 'https', 'mailto', :relative]},
|
112
|
+
'blockquote' => {'cite' => ['http', 'https', :relative]},
|
113
|
+
'del' => {'cite' => ['http', 'https', :relative]},
|
114
|
+
'img' => {'src' => ['http', 'https', :relative]},
|
115
|
+
'ins' => {'cite' => ['http', 'https', :relative]},
|
116
|
+
'q' => {'cite' => ['http', 'https', :relative]}
|
117
|
+
},
|
118
|
+
:removals => ['script', 'iframe', 'style']
|
119
|
+
}
|
120
|
+
end
|
121
|
+
|
122
|
+
def self.clean(html, config = RemovingSanitize::Config::ZUMOBI)
|
123
|
+
unless html.blank?
|
124
|
+
# Remove CDATA escaping: sanitize converts this to <[CDATA[ ... ]]> which is visible to user.
|
125
|
+
html.gsub!(/<!\[CDATA\[/,'')
|
126
|
+
html.gsub!(/\]\]>/,'')
|
127
|
+
# In one feed Nokogiri eats too much HTML when parsing it due to the present of a comment sequence.
|
128
|
+
# So here we resort to stripping it out with a regular expression first. !
|
129
|
+
html.gsub!(/<!--[^-]*-->/,"")
|
130
|
+
# Decode HTML entities.
|
131
|
+
coder = HTMLEntities.new
|
132
|
+
html = coder.decode(html)
|
133
|
+
# Decode HTML that is escaped, e.g. "<div>test</div>"
|
134
|
+
html = CGI::unescapeHTML(html)
|
135
|
+
unless (html.blank?)
|
136
|
+
unless (config[:removals].nil?)
|
137
|
+
doc = Nokogiri::HTML.fragment "<div>#{html}</div>"
|
138
|
+
config[:removals].each do |removal|
|
139
|
+
doc.search(removal).each do |element|
|
140
|
+
element.remove
|
141
|
+
end
|
142
|
+
end
|
143
|
+
# The original does not work on plain text, at the least. Methods in EntryDecorator
|
144
|
+
# like text(true) and plaintext could return content with a wrapping div element.
|
145
|
+
# html = doc.children[0].to_html
|
146
|
+
html = doc.children[0].children.map { |node| node.to_html }.join('')
|
147
|
+
end
|
148
|
+
html = Sanitize.clean(html, config)
|
149
|
+
end
|
150
|
+
end
|
151
|
+
html
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
data/lib/zumobi/version.rb
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: zumobi
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 7
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 12
|
10
|
+
version: 0.0.12
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Alx Dark
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2013-
|
18
|
+
date: 2013-04-02 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
name: curb
|
@@ -60,9 +60,23 @@ dependencies:
|
|
60
60
|
type: :runtime
|
61
61
|
version_requirements: *id003
|
62
62
|
- !ruby/object:Gem::Dependency
|
63
|
-
name:
|
63
|
+
name: htmlentities
|
64
64
|
prerelease: false
|
65
65
|
requirement: &id004 !ruby/object:Gem::Requirement
|
66
|
+
none: false
|
67
|
+
requirements:
|
68
|
+
- - ">="
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
hash: 3
|
71
|
+
segments:
|
72
|
+
- 0
|
73
|
+
version: "0"
|
74
|
+
type: :runtime
|
75
|
+
version_requirements: *id004
|
76
|
+
- !ruby/object:Gem::Dependency
|
77
|
+
name: airbrake
|
78
|
+
prerelease: false
|
79
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
66
80
|
none: false
|
67
81
|
requirements:
|
68
82
|
- - ~>
|
@@ -74,11 +88,11 @@ dependencies:
|
|
74
88
|
- 6
|
75
89
|
version: 3.1.6
|
76
90
|
type: :runtime
|
77
|
-
version_requirements: *
|
91
|
+
version_requirements: *id005
|
78
92
|
- !ruby/object:Gem::Dependency
|
79
93
|
name: urbanairship
|
80
94
|
prerelease: false
|
81
|
-
requirement: &
|
95
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
82
96
|
none: false
|
83
97
|
requirements:
|
84
98
|
- - ~>
|
@@ -90,7 +104,7 @@ dependencies:
|
|
90
104
|
- 0
|
91
105
|
version: 2.3.0
|
92
106
|
type: :runtime
|
93
|
-
version_requirements: *
|
107
|
+
version_requirements: *id006
|
94
108
|
description: Cross-app notification and exception handling support
|
95
109
|
email:
|
96
110
|
- alx.dark@zumobi.com
|
@@ -101,9 +115,13 @@ extensions: []
|
|
101
115
|
extra_rdoc_files: []
|
102
116
|
|
103
117
|
files:
|
118
|
+
- lib/zumobi/amazon_s3.rb
|
104
119
|
- lib/zumobi/exception_handler.rb
|
120
|
+
- lib/zumobi/gzip_reader.rb
|
121
|
+
- lib/zumobi/gzip_writer.rb
|
105
122
|
- lib/zumobi/notifications_logger.rb
|
106
123
|
- lib/zumobi/notifier.rb
|
124
|
+
- lib/zumobi/removing_sanitize.rb
|
107
125
|
- lib/zumobi/version.rb
|
108
126
|
- lib/zumobi.rb
|
109
127
|
homepage: http://www.zumobi.com/
|