rchardet 1.7.0 → 1.8.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 3cdc02a9943abeba846fd618a5ee9f04fe8ef506
4
- data.tar.gz: edeeefd5700589aa5bb8cbc371a6cd580e2557fc
2
+ SHA256:
3
+ metadata.gz: b03009ca37e41a17ddc0f4f62cd5889b11b59b1f2100998f191a0be7ede5d000
4
+ data.tar.gz: ea8473dc5f1b7c4d24f858de90c591f1dec66055cc05b30b475e8256fa0c7a41
5
5
  SHA512:
6
- metadata.gz: facabfeae468c114f98ae3e6f38aedbf9c3a1f4393995b00d539903b67444a1124ed2a1307c56192263d6b28a9167766fd2fdd1c5eda2183379d37a679cb5a10
7
- data.tar.gz: 374d62ffda808bfd64639de216087162d2f117fd78cbc87fc9bcd05162e00c0e311638a20748c8a7033b7ad5320ff2ab8d2011843e4400992c23548911fabc49
6
+ metadata.gz: 7829042566b227306274d03219456cda42b8cd3767a085bec2dd72e0d6d0d4fa78a165fd911768599c2c255f5879a7c25d2cde74bee2091a70e5fbf2ca77e586
7
+ data.tar.gz: 249b2027502f888713c57bcae850178adcdf60db9463a5357b396b3a2d9fd76a65d9a12247f8cddf8bf4e877d1c1a427c05b0c040c3f329b604106775af85f27
@@ -33,8 +33,8 @@ require 'rchardet/euckrfreq'
33
33
  require 'rchardet/euckrprober'
34
34
  require 'rchardet/euctwfreq'
35
35
  require 'rchardet/euctwprober'
36
- require 'rchardet/gb2312freq'
37
- require 'rchardet/gb2312prober'
36
+ require 'rchardet/gb18030freq'
37
+ require 'rchardet/gb18030prober'
38
38
  require 'rchardet/hebrewprober'
39
39
  require 'rchardet/jisfreq'
40
40
  require 'rchardet/jpcntx'
@@ -149,16 +149,16 @@ module CharDet
149
149
  end
150
150
  end
151
151
 
152
- class GB2312DistributionAnalysis < CharDistributionAnalysis
152
+ class GB18030DistributionAnalysis < CharDistributionAnalysis
153
153
  def initialize
154
154
  super()
155
- @charToFreqOrder = GB2312CharToFreqOrder
156
- @tableSize = GB2312_TABLE_SIZE
157
- @typicalDistributionRatio = GB2312_TYPICAL_DISTRIBUTION_RATIO
155
+ @charToFreqOrder = GB18030CharToFreqOrder
156
+ @tableSize = GB18030_TABLE_SIZE
157
+ @typicalDistributionRatio = GB18030_TYPICAL_DISTRIBUTION_RATIO
158
158
  end
159
159
 
160
160
  def get_order(aStr)
161
- # for GB2312 encoding, we are interested
161
+ # for GB18030 encoding, we are interested
162
162
  # first byte range: 0xb0 -- 0xfe
163
163
  # second byte range: 0xa1 -- 0xfe
164
164
  # no validation needed here. State machine has done that
@@ -26,7 +26,7 @@
26
26
  # 02110-1301 USA
27
27
  ######################### END LICENSE BLOCK #########################
28
28
 
29
- # GB2312 most frequently used character table
29
+ # GB18030 most frequently used character table
30
30
  #
31
31
  # Char to FreqOrder table , from hz6763
32
32
 
@@ -41,11 +41,11 @@
41
41
  # Typical Distribution Ratio about 25% of Ideal one, still much higher that RDR
42
42
 
43
43
  module CharDet
44
- GB2312_TYPICAL_DISTRIBUTION_RATIO = 0.9
44
+ GB18030_TYPICAL_DISTRIBUTION_RATIO = 0.9
45
45
 
46
- GB2312_TABLE_SIZE = 3760
46
+ GB18030_TABLE_SIZE = 3760
47
47
 
48
- GB2312CharToFreqOrder = [
48
+ GB18030CharToFreqOrder = [
49
49
  1671, 749,1443,2364,3924,3807,2330,3921,1704,3463,2691,1511,1515, 572,3191,2205,
50
50
  2361, 224,2558, 479,1711, 963,3162, 440,4060,1905,2966,2947,3580,2647,3961,3842,
51
51
  2204, 869,4207, 970,2678,5626,2944,2956,1479,4048, 514,3595, 588,1346,2820,3409,
@@ -27,16 +27,16 @@
27
27
  ######################### END LICENSE BLOCK #########################
28
28
 
29
29
  module CharDet
30
- class GB2312Prober < MultiByteCharSetProber
30
+ class GB18030Prober < MultiByteCharSetProber
31
31
  def initialize
32
32
  super
33
- @codingSM = CodingStateMachine.new(GB2312SMModel)
34
- @distributionAnalyzer = GB2312DistributionAnalysis.new()
33
+ @codingSM = CodingStateMachine.new(GB18030SMModel)
34
+ @distributionAnalyzer = GB18030DistributionAnalysis.new()
35
35
  reset()
36
36
  end
37
37
 
38
38
  def get_charset_name
39
- return "GB2312"
39
+ return "GB18030"
40
40
  end
41
41
  end
42
42
  end
@@ -36,7 +36,7 @@ module CharDet
36
36
  UTF8Prober.new,
37
37
  SJISProber.new,
38
38
  EUCJPProber.new,
39
- GB2312Prober.new,
39
+ GB18030Prober.new,
40
40
  EUCKRProber.new,
41
41
  Big5Prober.new,
42
42
  EUCTWProber.new
@@ -239,9 +239,9 @@ module CharDet
239
239
  'name' => 'x-euc-tw'
240
240
  }.freeze
241
241
 
242
- # GB2312
242
+ # GB18030
243
243
 
244
- GB2312_cls = [
244
+ GB18030_cls = [
245
245
  1,1,1,1,1,1,1,1, # 00 - 07
246
246
  1,1,1,1,1,1,0,0, # 08 - 0f
247
247
  1,1,1,1,1,1,1,1, # 10 - 17
@@ -276,7 +276,7 @@ module CharDet
276
276
  6,6,6,6,6,6,6,0 # f8 - ff
277
277
  ].freeze
278
278
 
279
- GB2312_st = [
279
+ GB18030_st = [
280
280
  EError,EStart,EStart,EStart,EStart,EStart, 3,EError,#00-07
281
281
  EError,EError,EError,EError,EError,EError,EItsMe,EItsMe,#08-0f
282
282
  EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EError,EError,EStart,#10-17
@@ -290,13 +290,13 @@ module CharDet
290
290
  # it is used for frequency analysis only, and we are validing
291
291
  # each code range there as well. So it is safe to set it to be
292
292
  # 2 here.
293
- GB2312CharLenTable = [0, 1, 1, 1, 1, 1, 2].freeze
293
+ GB18030CharLenTable = [0, 1, 1, 1, 1, 1, 2].freeze
294
294
 
295
- GB2312SMModel = {'classTable' => GB2312_cls,
295
+ GB18030SMModel = {'classTable' => GB18030_cls,
296
296
  'classFactor' => 7,
297
- 'stateTable' => GB2312_st,
298
- 'charLenTable' => GB2312CharLenTable,
299
- 'name' => 'GB2312'
297
+ 'stateTable' => GB18030_st,
298
+ 'charLenTable' => GB18030CharLenTable,
299
+ 'name' => 'GB18030'
300
300
  }.freeze
301
301
 
302
302
  # Shift_JIS
@@ -1,3 +1,3 @@
1
1
  module CharDet
2
- VERSION = "1.7.0"
2
+ VERSION = "1.8.0"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rchardet
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.7.0
4
+ version: 1.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Michael Grosser
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2018-01-07 00:00:00.000000000 Z
12
+ date: 2018-06-01 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description:
15
15
  email:
@@ -34,8 +34,8 @@ files:
34
34
  - lib/rchardet/euckrprober.rb
35
35
  - lib/rchardet/euctwfreq.rb
36
36
  - lib/rchardet/euctwprober.rb
37
- - lib/rchardet/gb2312freq.rb
38
- - lib/rchardet/gb2312prober.rb
37
+ - lib/rchardet/gb18030freq.rb
38
+ - lib/rchardet/gb18030prober.rb
39
39
  - lib/rchardet/hebrewprober.rb
40
40
  - lib/rchardet/jisfreq.rb
41
41
  - lib/rchardet/jpcntx.rb
@@ -75,7 +75,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
75
75
  version: '0'
76
76
  requirements: []
77
77
  rubyforge_project:
78
- rubygems_version: 2.6.14
78
+ rubygems_version: 2.7.6
79
79
  signing_key:
80
80
  specification_version: 4
81
81
  summary: Character encoding auto-detection in Ruby. As smart as your browser. Open