sanitizer 0.1.8 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/sanitizer/htmlentries.rb +1 -1
- data/lib/sanitizer/sanitizer.rb +64 -42
- data/lib/sanitizer/version.rb +1 -1
- data/spec/sanitizer_spec.rb +6 -0
- metadata +5 -5
data/lib/sanitizer/sanitizer.rb
CHANGED
@@ -5,57 +5,89 @@ module Sanitizer
|
|
5
5
|
|
6
6
|
# All self.methods
|
7
7
|
class << self
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
8
|
+
|
9
|
+
# Sanitize to clean text
|
10
|
+
def sanitize!(text)
|
11
|
+
strip_tags!(text)
|
12
|
+
clean_spaces!(text)
|
13
|
+
text.replace html_encode(text)
|
12
14
|
text
|
13
15
|
end
|
16
|
+
|
17
|
+
def sanitize(text)
|
18
|
+
sanitize! text.dup
|
19
|
+
end
|
14
20
|
|
21
|
+
# Clean retundant spaces
|
22
|
+
def clean_spaces!(text)
|
23
|
+
text.gsub!(/\s+/, " ")
|
24
|
+
text.strip!
|
25
|
+
text
|
26
|
+
end
|
27
|
+
|
15
28
|
def clean_spaces(text)
|
16
|
-
|
17
|
-
output.gsub!(/\s+/, " ")
|
18
|
-
output.strip!
|
19
|
-
output
|
29
|
+
clean_spaces! text.dup
|
20
30
|
end
|
21
31
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
32
|
+
# remove comments
|
33
|
+
def strip_comments!(text)
|
34
|
+
text.gsub!(/(\<\!\-\-\b*[^\-\-\>]*.*?\-\-\>)/ui, "")
|
35
|
+
text.gsub!(/(\<\s?\!--.*\s?--\>)/uim, "")
|
36
|
+
text
|
27
37
|
end
|
28
38
|
|
39
|
+
def strip_comments(text)
|
40
|
+
strip_comments! text.dup
|
41
|
+
end
|
42
|
+
|
29
43
|
# Remove all <script> and <style> tags
|
30
|
-
def strip_disallowed_tags(text)
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
output.gsub!(/(<style\s*.*>.*<\/style>)/uim, "")
|
44
|
+
def strip_disallowed_tags!(text)
|
45
|
+
text.gsub!(/(<script\s*.*>.*<\/script>)/uim, "")
|
46
|
+
text.gsub!(/(<script\s*.*\/?>)/uim, "")
|
47
|
+
text.gsub!(/(<link\s*.*\/?>)/uim, "")
|
48
|
+
text.gsub!(/(<style\s*.*>.*<\/style>)/uim, "")
|
36
49
|
|
37
50
|
# Stripping html entities too
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
51
|
+
text.gsub!(/(\<script\s*.*\>.*\<\/script\>)/uim, "")
|
52
|
+
text.gsub!(/(\<script\s*.*\/?\>)/uim, "")
|
53
|
+
text.gsub!(/(\<link\s*.*\/?\>)/uim, "")
|
54
|
+
text.gsub!(/(\<style\s*.*\>.*\<\/style\>)/uim, "")
|
55
|
+
text
|
43
56
|
end
|
57
|
+
|
58
|
+
def strip_disallowed_tags(text)
|
59
|
+
strip_disallowed_tags! text.dup
|
60
|
+
end
|
44
61
|
|
45
62
|
# Remove all tags from from text
|
46
|
-
def strip_tags(text, *tags)
|
47
|
-
output = text.dup
|
63
|
+
def strip_tags!(text, *tags)
|
48
64
|
if tags.empty? # clear all tags by default
|
49
|
-
|
50
|
-
|
65
|
+
text.gsub!(/<\/?[^>]*>/uim, "")
|
66
|
+
text.gsub!(/\<\/?[^\>]*\>/uim, "")
|
51
67
|
else # clean only selected tags
|
52
68
|
strip = tags.map do |tag|
|
53
69
|
%Q{(#{tag})}
|
54
70
|
end.join('|')
|
55
|
-
|
56
|
-
|
71
|
+
text.gsub!(/<\/?(#{strip})[^>]*>/uim, "")
|
72
|
+
text.gsub!(/\<\/?(#{strip})[^\>]*\>/uim, "")
|
57
73
|
end
|
58
|
-
|
74
|
+
text
|
75
|
+
end
|
76
|
+
|
77
|
+
def strip_tags(text, *tags)
|
78
|
+
strip_tags! text.dup, *tags
|
79
|
+
end
|
80
|
+
|
81
|
+
# Alguns feeds retornam tags "escapadas" dentro do conteúdo (ex: <br/>)
|
82
|
+
# Este método deve ser utilizado após o stripping e sanitização, para não deixar que essas tags sejam exibidas como conteúdo
|
83
|
+
def entities_to_chars!(text)
|
84
|
+
text.gsub!(/\</uim, "<")
|
85
|
+
text.gsub!(/\>/uim, ">")
|
86
|
+
text
|
87
|
+
end
|
88
|
+
|
89
|
+
def entities_to_chars(text)
|
90
|
+
entities_to_chars! text.dup
|
59
91
|
end
|
60
92
|
|
61
93
|
# Convert invalid chars to HTML Entries
|
@@ -68,16 +100,6 @@ module Sanitizer
|
|
68
100
|
def html_decode(text)
|
69
101
|
text = text.to_s
|
70
102
|
@@htmle.decode(text, :named)
|
71
|
-
end
|
72
|
-
|
73
|
-
# Alguns feeds retornam tags "escapadas" dentro do conteúdo (ex: <br/>)
|
74
|
-
# Este método deve ser utilizado após o stripping e sanitização, para não deixar que essas tags sejam exibidas como conteúdo
|
75
|
-
def entities_to_chars(text)
|
76
|
-
output = text.dup
|
77
|
-
output.gsub!(/\</uim, "<")
|
78
|
-
output.gsub!(/\>/uim, ">")
|
79
|
-
output
|
80
|
-
end
|
81
|
-
|
103
|
+
end
|
82
104
|
end # self
|
83
105
|
end
|
data/lib/sanitizer/version.rb
CHANGED
data/spec/sanitizer_spec.rb
CHANGED
@@ -18,6 +18,12 @@ describe Sanitizer do
|
|
18
18
|
output.should == 'Eu & você como Vai'
|
19
19
|
end
|
20
20
|
|
21
|
+
it "should not break “ entities" do
|
22
|
+
html = "“ Testando"
|
23
|
+
output = Sanitizer.sanitize(html)
|
24
|
+
output.should == "“ Testando"
|
25
|
+
end
|
26
|
+
|
21
27
|
it "should clean spaces and tags" do
|
22
28
|
html = "<p>Oi <b>como</b>
|
23
29
|
Vai</p>"
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sanitizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
-
|
10
|
-
version: 0.
|
8
|
+
- 2
|
9
|
+
- 0
|
10
|
+
version: 0.2.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Marcelo Eden
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-
|
18
|
+
date: 2011-06-29 00:00:00 -03:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|