zumobi 0.0.11 → 0.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/zumobi.rb +4 -3
- data/lib/zumobi/amazon_s3.rb +27 -0
- data/lib/zumobi/gzip_reader.rb +15 -0
- data/lib/zumobi/gzip_writer.rb +16 -0
- data/lib/zumobi/removing_sanitize.rb +154 -0
- data/lib/zumobi/version.rb +1 -1
- metadata +26 -8
data/lib/zumobi.rb
CHANGED
@@ -1,10 +1,11 @@
|
|
1
1
|
require "zumobi/exception_handler"
|
2
2
|
require "zumobi/notifications_logger"
|
3
3
|
require "zumobi/notifier"
|
4
|
+
require "zumobi/amazon_s3"
|
5
|
+
require "zumobi/gzip_reader"
|
6
|
+
require "zumobi/gzip_writer"
|
7
|
+
require "zumobi/removing_sanitize"
|
4
8
|
require "zumobi/version"
|
5
9
|
|
6
10
|
module Zumobi
|
7
|
-
|
8
|
-
|
9
|
-
|
10
11
|
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Zumobi
|
2
|
+
class S3
|
3
|
+
def self.establish_s3_connection
|
4
|
+
config = Zumobi::S3.config()
|
5
|
+
AWS::S3::Base.establish_connection!( :access_key_id => config.access_key_id, :secret_access_key => config.secret_access_key )
|
6
|
+
puts "Connected to s3"
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.save_in_s3(data, options=Hash.new)
|
10
|
+
establish_s3_connection
|
11
|
+
config = Zumobi::S3.config()
|
12
|
+
puts "Sending file to http://#{config.bucket}/#{config.path}"
|
13
|
+
AWS::S3::S3Object.store(config.path, data, config.bucket, options)
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def self.config
|
19
|
+
config = SimpleConfig.for(:amazon_s3)
|
20
|
+
if (config.access_key_id.blank? || config.access_key_id.blank? || config.bucket.blank? || config.path.blank? )
|
21
|
+
raise "You must include a simple config :amazon_s3 with :access_key_id, :secret_access_key, :bucket and :path for this application."
|
22
|
+
end
|
23
|
+
config
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'zlib'
|
2
|
+
|
3
|
+
module Zumobi
|
4
|
+
class GzipWriter
|
5
|
+
|
6
|
+
def self.gzip(string)
|
7
|
+
#http://devblog.famundo.com/articles/2007/03/02/serving-compressed-content-from-amazons-s3
|
8
|
+
strio = StringIO.open('', 'w')
|
9
|
+
gz = Zlib::GzipWriter.new(strio)
|
10
|
+
gz.write(string)
|
11
|
+
gz.close
|
12
|
+
strio.string
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,154 @@
|
|
1
|
+
# A wrapper to add the ability to specify "removals" when sanitizing HTML. The library we're using will
|
2
|
+
# remove tags, but it cannot be told to remove a tag with all of its child content, as we would want
|
3
|
+
# for a script or object tag, for example.
|
4
|
+
module Zumobi
|
5
|
+
class RemovingSanitize
|
6
|
+
module Config
|
7
|
+
PLAINTEXT = {}
|
8
|
+
|
9
|
+
SPORTS = {
|
10
|
+
# Leave script in, as well delete it and all its contents specifically.
|
11
|
+
:elements => ['img', 'a', 'p', 'b', 'i', 'strong', 'em', 'cite', 'dl', 'dt', 'dd', 'ul', 'ol', 'li', 'small',
|
12
|
+
'big', 'strike', 's', 'u', 'br', 'table', 'tr', 'td', 'th'],
|
13
|
+
:attributes => {
|
14
|
+
'a' => ['href'],
|
15
|
+
'img' => ['src', 'width', 'height']
|
16
|
+
},
|
17
|
+
:protocols => { 'a' => {'href' => ['http']} },
|
18
|
+
:removals => ['script']
|
19
|
+
}
|
20
|
+
|
21
|
+
RESTRICTED = {
|
22
|
+
:elements => ['b','em','i','strong','u']
|
23
|
+
}
|
24
|
+
|
25
|
+
ZUMOBI = {
|
26
|
+
:elements => [
|
27
|
+
'a', 'b', 'blockquote', 'br', 'caption', 'cite', 'code', 'col',
|
28
|
+
'colgroup', 'dd', 'dl', 'dt', 'em', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
|
29
|
+
'i', 'img', 'li', 'ol', 'p', 'pre', 'q', 'small', 'strike', 'strong',
|
30
|
+
'sub', 'sup', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'u',
|
31
|
+
'ul'],
|
32
|
+
:attributes => {
|
33
|
+
'a' => ['href'],
|
34
|
+
'blockquote' => ['cite'],
|
35
|
+
'col' => ['span', 'width'],
|
36
|
+
'colgroup' => ['span', 'width'],
|
37
|
+
'img' => ['src', 'width'],
|
38
|
+
'ol' => ['start', 'type'],
|
39
|
+
'q' => ['cite'],
|
40
|
+
'table' => ['summary', 'width'],
|
41
|
+
'td' => ['abbr', 'axis', 'colspan', 'rowspan', 'width'],
|
42
|
+
'th' => ['abbr', 'axis', 'colspan', 'rowspan', 'scope','width'],
|
43
|
+
'table' => ['width'],
|
44
|
+
'ul' => ['type']
|
45
|
+
},
|
46
|
+
:protocols => {
|
47
|
+
'a' => {'href' => ['http', 'https', 'mailto', :relative]},
|
48
|
+
'img' => {'src' => ['http', 'https', :relative]}
|
49
|
+
},
|
50
|
+
:removals => ['iframe', 'script', 'style']
|
51
|
+
}
|
52
|
+
|
53
|
+
PRESERVE_YOUTUBE = {
|
54
|
+
:elements => [
|
55
|
+
'a', 'b', 'blockquote', 'br', 'caption', 'cite', 'code', 'col',
|
56
|
+
'colgroup', 'dd', 'dl', 'dt', 'em', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
|
57
|
+
'i', 'img', 'li', 'ol', 'p', 'pre', 'q', 'small', 'strike', 'strong',
|
58
|
+
'sub', 'sup', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'u',
|
59
|
+
'ul', 'object', 'embed', 'param', 'iframe'],
|
60
|
+
:attributes => {
|
61
|
+
'a' => ['href'],
|
62
|
+
'blockquote' => ['cite'],
|
63
|
+
'col' => ['span', 'width'],
|
64
|
+
'colgroup' => ['span', 'width'],
|
65
|
+
'img' => ['src', 'width'],
|
66
|
+
'ol' => ['start', 'type'],
|
67
|
+
'q' => ['cite'],
|
68
|
+
'td' => ['abbr', 'axis', 'colspan', 'rowspan', 'width'],
|
69
|
+
'th' => ['abbr', 'axis', 'colspan', 'rowspan', 'scope','width'],
|
70
|
+
'table' => ['width'],
|
71
|
+
'ul' => ['type'],
|
72
|
+
'obj' => ['width', 'height'],
|
73
|
+
'embed' => ['src', 'type', 'allowscriptaccess', 'allowfullscreen', 'width', 'height'],
|
74
|
+
'param' => ['name', 'value'],
|
75
|
+
'iframe' => ['src']
|
76
|
+
},
|
77
|
+
:protocols => {
|
78
|
+
'a' => {'href' => ['http', 'https', 'mailto', :relative]},
|
79
|
+
'img' => {'src' => ['http', 'https', :relative]}
|
80
|
+
},
|
81
|
+
:removals => ['script', 'style']
|
82
|
+
}
|
83
|
+
|
84
|
+
REMOTE_HTML = {
|
85
|
+
:elements => %w[
|
86
|
+
a abbr b bdo blockquote br caption cite code col colgroup dd del dfn div dl
|
87
|
+
dt em figcaption figure h1 h2 h3 h4 h5 h6 hgroup i img ins kbd li mark
|
88
|
+
ol p pre q rp rt ruby s samp small span strike strong sub sup table tbody td
|
89
|
+
tfoot th thead time tr u ul var wbr
|
90
|
+
],
|
91
|
+
|
92
|
+
:attributes => {
|
93
|
+
:all => ['dir', 'lang', 'title', 'class', 'id'],
|
94
|
+
'a' => ['href'],
|
95
|
+
'blockquote' => ['cite'],
|
96
|
+
'col' => ['span', 'width'],
|
97
|
+
'colgroup' => ['span', 'width'],
|
98
|
+
'del' => ['cite', 'datetime'],
|
99
|
+
'img' => ['align', 'alt', 'height', 'src', 'width'],
|
100
|
+
'ins' => ['cite', 'datetime'],
|
101
|
+
'ol' => ['start', 'reversed', 'type'],
|
102
|
+
'q' => ['cite'],
|
103
|
+
'table' => ['summary', 'width'],
|
104
|
+
'td' => ['abbr', 'axis', 'colspan', 'rowspan', 'width'],
|
105
|
+
'th' => ['abbr', 'axis', 'colspan', 'rowspan', 'scope', 'width'],
|
106
|
+
'time' => ['datetime', 'pubdate'],
|
107
|
+
'ul' => ['type']
|
108
|
+
},
|
109
|
+
|
110
|
+
:protocols => {
|
111
|
+
'a' => {'href' => ['ftp', 'http', 'https', 'mailto', :relative]},
|
112
|
+
'blockquote' => {'cite' => ['http', 'https', :relative]},
|
113
|
+
'del' => {'cite' => ['http', 'https', :relative]},
|
114
|
+
'img' => {'src' => ['http', 'https', :relative]},
|
115
|
+
'ins' => {'cite' => ['http', 'https', :relative]},
|
116
|
+
'q' => {'cite' => ['http', 'https', :relative]}
|
117
|
+
},
|
118
|
+
:removals => ['script', 'iframe', 'style']
|
119
|
+
}
|
120
|
+
end
|
121
|
+
|
122
|
+
def self.clean(html, config = RemovingSanitize::Config::ZUMOBI)
|
123
|
+
unless html.blank?
|
124
|
+
# Remove CDATA escaping: sanitize converts this to <[CDATA[ ... ]]> which is visible to user.
|
125
|
+
html.gsub!(/<!\[CDATA\[/,'')
|
126
|
+
html.gsub!(/\]\]>/,'')
|
127
|
+
# In one feed Nokogiri eats too much HTML when parsing it due to the present of a comment sequence.
|
128
|
+
# So here we resort to stripping it out with a regular expression first. !
|
129
|
+
html.gsub!(/<!--[^-]*-->/,"")
|
130
|
+
# Decode HTML entities.
|
131
|
+
coder = HTMLEntities.new
|
132
|
+
html = coder.decode(html)
|
133
|
+
# Decode HTML that is escaped, e.g. "<div>test</div>"
|
134
|
+
html = CGI::unescapeHTML(html)
|
135
|
+
unless (html.blank?)
|
136
|
+
unless (config[:removals].nil?)
|
137
|
+
doc = Nokogiri::HTML.fragment "<div>#{html}</div>"
|
138
|
+
config[:removals].each do |removal|
|
139
|
+
doc.search(removal).each do |element|
|
140
|
+
element.remove
|
141
|
+
end
|
142
|
+
end
|
143
|
+
# The original does not work on plain text, at the least. Methods in EntryDecorator
|
144
|
+
# like text(true) and plaintext could return content with a wrapping div element.
|
145
|
+
# html = doc.children[0].to_html
|
146
|
+
html = doc.children[0].children.map { |node| node.to_html }.join('')
|
147
|
+
end
|
148
|
+
html = Sanitize.clean(html, config)
|
149
|
+
end
|
150
|
+
end
|
151
|
+
html
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
data/lib/zumobi/version.rb
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: zumobi
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 7
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 12
|
10
|
+
version: 0.0.12
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Alx Dark
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2013-
|
18
|
+
date: 2013-04-02 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
name: curb
|
@@ -60,9 +60,23 @@ dependencies:
|
|
60
60
|
type: :runtime
|
61
61
|
version_requirements: *id003
|
62
62
|
- !ruby/object:Gem::Dependency
|
63
|
-
name:
|
63
|
+
name: htmlentities
|
64
64
|
prerelease: false
|
65
65
|
requirement: &id004 !ruby/object:Gem::Requirement
|
66
|
+
none: false
|
67
|
+
requirements:
|
68
|
+
- - ">="
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
hash: 3
|
71
|
+
segments:
|
72
|
+
- 0
|
73
|
+
version: "0"
|
74
|
+
type: :runtime
|
75
|
+
version_requirements: *id004
|
76
|
+
- !ruby/object:Gem::Dependency
|
77
|
+
name: airbrake
|
78
|
+
prerelease: false
|
79
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
66
80
|
none: false
|
67
81
|
requirements:
|
68
82
|
- - ~>
|
@@ -74,11 +88,11 @@ dependencies:
|
|
74
88
|
- 6
|
75
89
|
version: 3.1.6
|
76
90
|
type: :runtime
|
77
|
-
version_requirements: *
|
91
|
+
version_requirements: *id005
|
78
92
|
- !ruby/object:Gem::Dependency
|
79
93
|
name: urbanairship
|
80
94
|
prerelease: false
|
81
|
-
requirement: &
|
95
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
82
96
|
none: false
|
83
97
|
requirements:
|
84
98
|
- - ~>
|
@@ -90,7 +104,7 @@ dependencies:
|
|
90
104
|
- 0
|
91
105
|
version: 2.3.0
|
92
106
|
type: :runtime
|
93
|
-
version_requirements: *
|
107
|
+
version_requirements: *id006
|
94
108
|
description: Cross-app notification and exception handling support
|
95
109
|
email:
|
96
110
|
- alx.dark@zumobi.com
|
@@ -101,9 +115,13 @@ extensions: []
|
|
101
115
|
extra_rdoc_files: []
|
102
116
|
|
103
117
|
files:
|
118
|
+
- lib/zumobi/amazon_s3.rb
|
104
119
|
- lib/zumobi/exception_handler.rb
|
120
|
+
- lib/zumobi/gzip_reader.rb
|
121
|
+
- lib/zumobi/gzip_writer.rb
|
105
122
|
- lib/zumobi/notifications_logger.rb
|
106
123
|
- lib/zumobi/notifier.rb
|
124
|
+
- lib/zumobi/removing_sanitize.rb
|
107
125
|
- lib/zumobi/version.rb
|
108
126
|
- lib/zumobi.rb
|
109
127
|
homepage: http://www.zumobi.com/
|