sanitize 5.0.0 → 5.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sanitize might be problematic. Click here for more details.

@@ -1,95 +0,0 @@
1
- # encoding: utf-8
2
- require_relative 'common'
3
-
4
- describe 'Unicode' do
5
- make_my_diffs_pretty!
6
- parallelize_me!
7
-
8
- # http://www.w3.org/TR/unicode-xml/#Charlist
9
- describe 'Unsuitable characters' do
10
- before do
11
- @s = Sanitize.new(Sanitize::Config::RELAXED)
12
- end
13
-
14
- it 'should not modify the input string' do
15
- fragment = "a\u0340b\u0341c"
16
- document = "a\u0340b\u0341c"
17
-
18
- @s.document(document)
19
- @s.fragment(fragment)
20
-
21
- fragment.must_equal "a\u0340b\u0341c"
22
- document.must_equal "a\u0340b\u0341c"
23
- end
24
-
25
- it 'should strip deprecated grave and acute clones' do
26
- @s.document("a\u0340b\u0341c").must_equal "<html><head></head><body>abc</body></html>"
27
- @s.fragment("a\u0340b\u0341c").must_equal 'abc'
28
- end
29
-
30
- it 'should strip deprecated Khmer characters' do
31
- @s.document("a\u17a3b\u17d3c").must_equal "<html><head></head><body>abc</body></html>"
32
- @s.fragment("a\u17a3b\u17d3c").must_equal 'abc'
33
- end
34
-
35
- it 'should strip line and paragraph separator punctuation' do
36
- @s.document("a\u2028b\u2029c").must_equal "<html><head></head><body>abc</body></html>"
37
- @s.fragment("a\u2028b\u2029c").must_equal 'abc'
38
- end
39
-
40
- it 'should strip bidi embedding control characters' do
41
- @s.document("a\u202ab\u202bc\u202cd\u202de\u202e")
42
- .must_equal "<html><head></head><body>abcde</body></html>"
43
-
44
- @s.fragment("a\u202ab\u202bc\u202cd\u202de\u202e")
45
- .must_equal 'abcde'
46
- end
47
-
48
- it 'should strip deprecated symmetric swapping characters' do
49
- @s.document("a\u206ab\u206bc").must_equal "<html><head></head><body>abc</body></html>"
50
- @s.fragment("a\u206ab\u206bc").must_equal 'abc'
51
- end
52
-
53
- it 'should strip deprecated Arabic form shaping characters' do
54
- @s.document("a\u206cb\u206dc").must_equal "<html><head></head><body>abc</body></html>"
55
- @s.fragment("a\u206cb\u206dc").must_equal 'abc'
56
- end
57
-
58
- it 'should strip deprecated National digit shape characters' do
59
- @s.document("a\u206eb\u206fc").must_equal "<html><head></head><body>abc</body></html>"
60
- @s.fragment("a\u206eb\u206fc").must_equal 'abc'
61
- end
62
-
63
- it 'should strip interlinear annotation characters' do
64
- @s.document("a\ufff9b\ufffac\ufffb").must_equal "<html><head></head><body>abc</body></html>"
65
- @s.fragment("a\ufff9b\ufffac\ufffb").must_equal 'abc'
66
- end
67
-
68
- it 'should strip BOM/zero-width non-breaking space characters' do
69
- @s.document("a\ufeffbc").must_equal "<html><head></head><body>abc</body></html>"
70
- @s.fragment("a\ufeffbc").must_equal 'abc'
71
- end
72
-
73
- it 'should strip object replacement characters' do
74
- @s.document("a\ufffcbc").must_equal "<html><head></head><body>abc</body></html>"
75
- @s.fragment("a\ufffcbc").must_equal 'abc'
76
- end
77
-
78
- it 'should strip musical notation scoping characters' do
79
- @s.document("a\u{1d173}b\u{1d174}c\u{1d175}d\u{1d176}e\u{1d177}f\u{1d178}g\u{1d179}h\u{1d17a}")
80
- .must_equal "<html><head></head><body>abcdefgh</body></html>"
81
-
82
- @s.fragment("a\u{1d173}b\u{1d174}c\u{1d175}d\u{1d176}e\u{1d177}f\u{1d178}g\u{1d179}h\u{1d17a}")
83
- .must_equal 'abcdefgh'
84
- end
85
-
86
- it 'should strip language tag code point characters' do
87
- str = String.new 'a'
88
- (0xE0000..0xE007F).each {|n| str << [n].pack('U') }
89
- str << 'b'
90
-
91
- @s.document(str).must_equal "<html><head></head><body>ab</body></html>"
92
- @s.fragment(str).must_equal 'ab'
93
- end
94
- end
95
- end