rchardet 1.10.0 → 1.10.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rchardet/utf1632prober.rb +9 -17
- data/lib/rchardet/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: b405d78aaa3c8148295596d77953a62baa922d06b0956b4bf728359410ddbc10
|
|
4
|
+
data.tar.gz: a8ccebaa1ac9bb0a024f5e3148a9b2f6372b4015524b1243e9e4660ae69443c2
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 380367b048d49f2da30ad8bf9255eb5c84de22612d0a423d257d6bb6c2afc32ab0026022d137b33a415a3ac4d4739ba19a75a12c15f59fd6917d9f673fdfbc59
|
|
7
|
+
data.tar.gz: 2ef5940854bae008fa02c647eb2073984c217c2291328bab9fb20584e4d8c1cc7c2d21b0bbfa38937fd7a9cab21e329f9464a3101414bd752e2bf2d98ca1a4d2
|
|
@@ -169,17 +169,13 @@ module CharDet
|
|
|
169
169
|
!@invalid_utf16le
|
|
170
170
|
end
|
|
171
171
|
|
|
172
|
+
# Validate if the quad of bytes is valid UTF-32.
|
|
173
|
+
# UTF-32 is valid in the range 0x00000000 - 0x0010FFFF
|
|
174
|
+
# excluding 0x0000D800 - 0x0000DFFF
|
|
175
|
+
# https://en.wikipedia.org/wiki/UTF-32
|
|
172
176
|
# @param [Array<Integer>] quad four consecutive bytes
|
|
173
177
|
# @return [void]
|
|
174
178
|
def validate_utf32_characters(quad)
|
|
175
|
-
"" "
|
|
176
|
-
Validate if the quad of bytes is valid UTF-32.
|
|
177
|
-
|
|
178
|
-
UTF-32 is valid in the range 0x00000000 - 0x0010FFFF
|
|
179
|
-
excluding 0x0000D800 - 0x0000DFFF
|
|
180
|
-
|
|
181
|
-
https://en.wikipedia.org/wiki/UTF-32
|
|
182
|
-
" ""
|
|
183
179
|
if quad[0] != 0 or quad[1] > 0x10 or quad[0] == 0 and quad[1] == 0 and (0xD8..0xDF).include?(quad[2])
|
|
184
180
|
@invalid_utf32be = true
|
|
185
181
|
end
|
|
@@ -188,18 +184,14 @@ module CharDet
|
|
|
188
184
|
end
|
|
189
185
|
end
|
|
190
186
|
|
|
187
|
+
# Validate if the pair of bytes is valid UTF-16.
|
|
188
|
+
# UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
|
|
189
|
+
# with an exception for surrogate pairs, which must be in the range
|
|
190
|
+
# 0xD800-0xDBFF followed by 0xDC00-0xDFFF
|
|
191
|
+
# https://en.wikipedia.org/wiki/UTF-16
|
|
191
192
|
# @param [Array<Integer>] pair two consecutive bytes
|
|
192
193
|
# @return [void]
|
|
193
194
|
def validate_utf16_characters(pair)
|
|
194
|
-
"" "
|
|
195
|
-
Validate if the pair of bytes is valid UTF-16.
|
|
196
|
-
|
|
197
|
-
UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
|
|
198
|
-
with an exception for surrogate pairs, which must be in the range
|
|
199
|
-
0xD800-0xDBFF followed by 0xDC00-0xDFFF
|
|
200
|
-
|
|
201
|
-
https://en.wikipedia.org/wiki/UTF-16
|
|
202
|
-
" ""
|
|
203
195
|
if !@first_half_surrogate_pair_detected_16be
|
|
204
196
|
if (0xD8..0xDB).include? pair[0]
|
|
205
197
|
@first_half_surrogate_pair_detected_16be = true
|
data/lib/rchardet/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: rchardet
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.10.
|
|
4
|
+
version: 1.10.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Michael Grosser
|
|
8
8
|
- Jeff Hodges
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2026-05-28 00:00:00.000000000 Z
|
|
12
12
|
dependencies: []
|
|
13
13
|
email:
|
|
14
14
|
- michael@grosser.it
|