sanitizer 0.1.8 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/sanitizer/htmlentries.rb +1 -1
- data/lib/sanitizer/sanitizer.rb +64 -42
- data/lib/sanitizer/version.rb +1 -1
- data/spec/sanitizer_spec.rb +6 -0
- metadata +5 -5
data/lib/sanitizer/sanitizer.rb
CHANGED
@@ -5,57 +5,89 @@ module Sanitizer
|
|
5
5
|
|
6
6
|
# All self.methods
|
7
7
|
class << self
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
8
|
+
|
9
|
+
# Sanitize to clean text
|
10
|
+
def sanitize!(text)
|
11
|
+
strip_tags!(text)
|
12
|
+
clean_spaces!(text)
|
13
|
+
text.replace html_encode(text)
|
12
14
|
text
|
13
15
|
end
|
16
|
+
|
17
|
+
def sanitize(text)
|
18
|
+
sanitize! text.dup
|
19
|
+
end
|
14
20
|
|
21
|
+
# Clean retundant spaces
|
22
|
+
def clean_spaces!(text)
|
23
|
+
text.gsub!(/\s+/, " ")
|
24
|
+
text.strip!
|
25
|
+
text
|
26
|
+
end
|
27
|
+
|
15
28
|
def clean_spaces(text)
|
16
|
-
|
17
|
-
output.gsub!(/\s+/, " ")
|
18
|
-
output.strip!
|
19
|
-
output
|
29
|
+
clean_spaces! text.dup
|
20
30
|
end
|
21
31
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
32
|
+
# remove comments
|
33
|
+
def strip_comments!(text)
|
34
|
+
text.gsub!(/(\<\!\-\-\b*[^\-\-\>]*.*?\-\-\>)/ui, "")
|
35
|
+
text.gsub!(/(\<\s?\!--.*\s?--\>)/uim, "")
|
36
|
+
text
|
27
37
|
end
|
28
38
|
|
39
|
+
def strip_comments(text)
|
40
|
+
strip_comments! text.dup
|
41
|
+
end
|
42
|
+
|
29
43
|
# Remove all <script> and <style> tags
|
30
|
-
def strip_disallowed_tags(text)
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
output.gsub!(/(<style\s*.*>.*<\/style>)/uim, "")
|
44
|
+
def strip_disallowed_tags!(text)
|
45
|
+
text.gsub!(/(<script\s*.*>.*<\/script>)/uim, "")
|
46
|
+
text.gsub!(/(<script\s*.*\/?>)/uim, "")
|
47
|
+
text.gsub!(/(<link\s*.*\/?>)/uim, "")
|
48
|
+
text.gsub!(/(<style\s*.*>.*<\/style>)/uim, "")
|
36
49
|
|
37
50
|
# Stripping html entities too
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
51
|
+
text.gsub!(/(\<script\s*.*\>.*\<\/script\>)/uim, "")
|
52
|
+
text.gsub!(/(\<script\s*.*\/?\>)/uim, "")
|
53
|
+
text.gsub!(/(\<link\s*.*\/?\>)/uim, "")
|
54
|
+
text.gsub!(/(\<style\s*.*\>.*\<\/style\>)/uim, "")
|
55
|
+
text
|
43
56
|
end
|
57
|
+
|
58
|
+
def strip_disallowed_tags(text)
|
59
|
+
strip_disallowed_tags! text.dup
|
60
|
+
end
|
44
61
|
|
45
62
|
# Remove all tags from from text
|
46
|
-
def strip_tags(text, *tags)
|
47
|
-
output = text.dup
|
63
|
+
def strip_tags!(text, *tags)
|
48
64
|
if tags.empty? # clear all tags by default
|
49
|
-
|
50
|
-
|
65
|
+
text.gsub!(/<\/?[^>]*>/uim, "")
|
66
|
+
text.gsub!(/\<\/?[^\>]*\>/uim, "")
|
51
67
|
else # clean only selected tags
|
52
68
|
strip = tags.map do |tag|
|
53
69
|
%Q{(#{tag})}
|
54
70
|
end.join('|')
|
55
|
-
|
56
|
-
|
71
|
+
text.gsub!(/<\/?(#{strip})[^>]*>/uim, "")
|
72
|
+
text.gsub!(/\<\/?(#{strip})[^\>]*\>/uim, "")
|
57
73
|
end
|
58
|
-
|
74
|
+
text
|
75
|
+
end
|
76
|
+
|
77
|
+
def strip_tags(text, *tags)
|
78
|
+
strip_tags! text.dup, *tags
|
79
|
+
end
|
80
|
+
|
81
|
+
# Alguns feeds retornam tags "escapadas" dentro do conteúdo (ex: <br/>)
|
82
|
+
# Este método deve ser utilizado após o stripping e sanitização, para não deixar que essas tags sejam exibidas como conteúdo
|
83
|
+
def entities_to_chars!(text)
|
84
|
+
text.gsub!(/\</uim, "<")
|
85
|
+
text.gsub!(/\>/uim, ">")
|
86
|
+
text
|
87
|
+
end
|
88
|
+
|
89
|
+
def entities_to_chars(text)
|
90
|
+
entities_to_chars! text.dup
|
59
91
|
end
|
60
92
|
|
61
93
|
# Convert invalid chars to HTML Entries
|
@@ -68,16 +100,6 @@ module Sanitizer
|
|
68
100
|
def html_decode(text)
|
69
101
|
text = text.to_s
|
70
102
|
@@htmle.decode(text, :named)
|
71
|
-
end
|
72
|
-
|
73
|
-
# Alguns feeds retornam tags "escapadas" dentro do conteúdo (ex: <br/>)
|
74
|
-
# Este método deve ser utilizado após o stripping e sanitização, para não deixar que essas tags sejam exibidas como conteúdo
|
75
|
-
def entities_to_chars(text)
|
76
|
-
output = text.dup
|
77
|
-
output.gsub!(/\</uim, "<")
|
78
|
-
output.gsub!(/\>/uim, ">")
|
79
|
-
output
|
80
|
-
end
|
81
|
-
|
103
|
+
end
|
82
104
|
end # self
|
83
105
|
end
|
data/lib/sanitizer/version.rb
CHANGED
data/spec/sanitizer_spec.rb
CHANGED
@@ -18,6 +18,12 @@ describe Sanitizer do
|
|
18
18
|
output.should == 'Eu & você como Vai'
|
19
19
|
end
|
20
20
|
|
21
|
+
it "should not break “ entities" do
|
22
|
+
html = "“ Testando"
|
23
|
+
output = Sanitizer.sanitize(html)
|
24
|
+
output.should == "“ Testando"
|
25
|
+
end
|
26
|
+
|
21
27
|
it "should clean spaces and tags" do
|
22
28
|
html = "<p>Oi <b>como</b>
|
23
29
|
Vai</p>"
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sanitizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
-
|
10
|
-
version: 0.
|
8
|
+
- 2
|
9
|
+
- 0
|
10
|
+
version: 0.2.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Marcelo Eden
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-
|
18
|
+
date: 2011-06-29 00:00:00 -03:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|