sanitize 4.6.6 → 5.2.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of sanitize might be problematic. Click here for more details.

data/test/test_unicode.rb DELETED
@@ -1,95 +0,0 @@
1
- # encoding: utf-8
2
- require_relative 'common'
3
-
4
- describe 'Unicode' do
5
- make_my_diffs_pretty!
6
- parallelize_me!
7
-
8
- # http://www.w3.org/TR/unicode-xml/#Charlist
9
- describe 'Unsuitable characters' do
10
- before do
11
- @s = Sanitize.new(Sanitize::Config::RELAXED)
12
- end
13
-
14
- it 'should not modify the input string' do
15
- fragment = "a\u0340b\u0341c"
16
- document = "a\u0340b\u0341c"
17
-
18
- @s.document(document)
19
- @s.fragment(fragment)
20
-
21
- fragment.must_equal "a\u0340b\u0341c"
22
- document.must_equal "a\u0340b\u0341c"
23
- end
24
-
25
- it 'should strip deprecated grave and acute clones' do
26
- @s.document("a\u0340b\u0341c").must_equal "<html><head></head><body>abc</body></html>\n"
27
- @s.fragment("a\u0340b\u0341c").must_equal 'abc'
28
- end
29
-
30
- it 'should strip deprecated Khmer characters' do
31
- @s.document("a\u17a3b\u17d3c").must_equal "<html><head></head><body>abc</body></html>\n"
32
- @s.fragment("a\u17a3b\u17d3c").must_equal 'abc'
33
- end
34
-
35
- it 'should strip line and paragraph separator punctuation' do
36
- @s.document("a\u2028b\u2029c").must_equal "<html><head></head><body>abc</body></html>\n"
37
- @s.fragment("a\u2028b\u2029c").must_equal 'abc'
38
- end
39
-
40
- it 'should strip bidi embedding control characters' do
41
- @s.document("a\u202ab\u202bc\u202cd\u202de\u202e")
42
- .must_equal "<html><head></head><body>abcde</body></html>\n"
43
-
44
- @s.fragment("a\u202ab\u202bc\u202cd\u202de\u202e")
45
- .must_equal 'abcde'
46
- end
47
-
48
- it 'should strip deprecated symmetric swapping characters' do
49
- @s.document("a\u206ab\u206bc").must_equal "<html><head></head><body>abc</body></html>\n"
50
- @s.fragment("a\u206ab\u206bc").must_equal 'abc'
51
- end
52
-
53
- it 'should strip deprecated Arabic form shaping characters' do
54
- @s.document("a\u206cb\u206dc").must_equal "<html><head></head><body>abc</body></html>\n"
55
- @s.fragment("a\u206cb\u206dc").must_equal 'abc'
56
- end
57
-
58
- it 'should strip deprecated National digit shape characters' do
59
- @s.document("a\u206eb\u206fc").must_equal "<html><head></head><body>abc</body></html>\n"
60
- @s.fragment("a\u206eb\u206fc").must_equal 'abc'
61
- end
62
-
63
- it 'should strip interlinear annotation characters' do
64
- @s.document("a\ufff9b\ufffac\ufffb").must_equal "<html><head></head><body>abc</body></html>\n"
65
- @s.fragment("a\ufff9b\ufffac\ufffb").must_equal 'abc'
66
- end
67
-
68
- it 'should strip BOM/zero-width non-breaking space characters' do
69
- @s.document("a\ufeffbc").must_equal "<html><head></head><body>abc</body></html>\n"
70
- @s.fragment("a\ufeffbc").must_equal 'abc'
71
- end
72
-
73
- it 'should strip object replacement characters' do
74
- @s.document("a\ufffcbc").must_equal "<html><head></head><body>abc</body></html>\n"
75
- @s.fragment("a\ufffcbc").must_equal 'abc'
76
- end
77
-
78
- it 'should strip musical notation scoping characters' do
79
- @s.document("a\u{1d173}b\u{1d174}c\u{1d175}d\u{1d176}e\u{1d177}f\u{1d178}g\u{1d179}h\u{1d17a}")
80
- .must_equal "<html><head></head><body>abcdefgh</body></html>\n"
81
-
82
- @s.fragment("a\u{1d173}b\u{1d174}c\u{1d175}d\u{1d176}e\u{1d177}f\u{1d178}g\u{1d179}h\u{1d17a}")
83
- .must_equal 'abcdefgh'
84
- end
85
-
86
- it 'should strip language tag code point characters' do
87
- str = String.new 'a'
88
- (0xE0000..0xE007F).each {|n| str << [n].pack('U') }
89
- str << 'b'
90
-
91
- @s.document(str).must_equal "<html><head></head><body>ab</body></html>\n"
92
- @s.fragment(str).must_equal 'ab'
93
- end
94
- end
95
- end