rchardet 1.7.0 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 3cdc02a9943abeba846fd618a5ee9f04fe8ef506
4
- data.tar.gz: edeeefd5700589aa5bb8cbc371a6cd580e2557fc
2
+ SHA256:
3
+ metadata.gz: b03009ca37e41a17ddc0f4f62cd5889b11b59b1f2100998f191a0be7ede5d000
4
+ data.tar.gz: ea8473dc5f1b7c4d24f858de90c591f1dec66055cc05b30b475e8256fa0c7a41
5
5
  SHA512:
6
- metadata.gz: facabfeae468c114f98ae3e6f38aedbf9c3a1f4393995b00d539903b67444a1124ed2a1307c56192263d6b28a9167766fd2fdd1c5eda2183379d37a679cb5a10
7
- data.tar.gz: 374d62ffda808bfd64639de216087162d2f117fd78cbc87fc9bcd05162e00c0e311638a20748c8a7033b7ad5320ff2ab8d2011843e4400992c23548911fabc49
6
+ metadata.gz: 7829042566b227306274d03219456cda42b8cd3767a085bec2dd72e0d6d0d4fa78a165fd911768599c2c255f5879a7c25d2cde74bee2091a70e5fbf2ca77e586
7
+ data.tar.gz: 249b2027502f888713c57bcae850178adcdf60db9463a5357b396b3a2d9fd76a65d9a12247f8cddf8bf4e877d1c1a427c05b0c040c3f329b604106775af85f27
@@ -33,8 +33,8 @@ require 'rchardet/euckrfreq'
33
33
  require 'rchardet/euckrprober'
34
34
  require 'rchardet/euctwfreq'
35
35
  require 'rchardet/euctwprober'
36
- require 'rchardet/gb2312freq'
37
- require 'rchardet/gb2312prober'
36
+ require 'rchardet/gb18030freq'
37
+ require 'rchardet/gb18030prober'
38
38
  require 'rchardet/hebrewprober'
39
39
  require 'rchardet/jisfreq'
40
40
  require 'rchardet/jpcntx'
@@ -149,16 +149,16 @@ module CharDet
149
149
  end
150
150
  end
151
151
 
152
- class GB2312DistributionAnalysis < CharDistributionAnalysis
152
+ class GB18030DistributionAnalysis < CharDistributionAnalysis
153
153
  def initialize
154
154
  super()
155
- @charToFreqOrder = GB2312CharToFreqOrder
156
- @tableSize = GB2312_TABLE_SIZE
157
- @typicalDistributionRatio = GB2312_TYPICAL_DISTRIBUTION_RATIO
155
+ @charToFreqOrder = GB18030CharToFreqOrder
156
+ @tableSize = GB18030_TABLE_SIZE
157
+ @typicalDistributionRatio = GB18030_TYPICAL_DISTRIBUTION_RATIO
158
158
  end
159
159
 
160
160
  def get_order(aStr)
161
- # for GB2312 encoding, we are interested
161
+ # for GB18030 encoding, we are interested
162
162
  # first byte range: 0xb0 -- 0xfe
163
163
  # second byte range: 0xa1 -- 0xfe
164
164
  # no validation needed here. State machine has done that
@@ -26,7 +26,7 @@
26
26
  # 02110-1301 USA
27
27
  ######################### END LICENSE BLOCK #########################
28
28
 
29
- # GB2312 most frequently used character table
29
+ # GB18030 most frequently used character table
30
30
  #
31
31
  # Char to FreqOrder table , from hz6763
32
32
 
@@ -41,11 +41,11 @@
41
41
  # Typical Distribution Ratio about 25% of Ideal one, still much higher that RDR
42
42
 
43
43
  module CharDet
44
- GB2312_TYPICAL_DISTRIBUTION_RATIO = 0.9
44
+ GB18030_TYPICAL_DISTRIBUTION_RATIO = 0.9
45
45
 
46
- GB2312_TABLE_SIZE = 3760
46
+ GB18030_TABLE_SIZE = 3760
47
47
 
48
- GB2312CharToFreqOrder = [
48
+ GB18030CharToFreqOrder = [
49
49
  1671, 749,1443,2364,3924,3807,2330,3921,1704,3463,2691,1511,1515, 572,3191,2205,
50
50
  2361, 224,2558, 479,1711, 963,3162, 440,4060,1905,2966,2947,3580,2647,3961,3842,
51
51
  2204, 869,4207, 970,2678,5626,2944,2956,1479,4048, 514,3595, 588,1346,2820,3409,
@@ -27,16 +27,16 @@
27
27
  ######################### END LICENSE BLOCK #########################
28
28
 
29
29
  module CharDet
30
- class GB2312Prober < MultiByteCharSetProber
30
+ class GB18030Prober < MultiByteCharSetProber
31
31
  def initialize
32
32
  super
33
- @codingSM = CodingStateMachine.new(GB2312SMModel)
34
- @distributionAnalyzer = GB2312DistributionAnalysis.new()
33
+ @codingSM = CodingStateMachine.new(GB18030SMModel)
34
+ @distributionAnalyzer = GB18030DistributionAnalysis.new()
35
35
  reset()
36
36
  end
37
37
 
38
38
  def get_charset_name
39
- return "GB2312"
39
+ return "GB18030"
40
40
  end
41
41
  end
42
42
  end
@@ -36,7 +36,7 @@ module CharDet
36
36
  UTF8Prober.new,
37
37
  SJISProber.new,
38
38
  EUCJPProber.new,
39
- GB2312Prober.new,
39
+ GB18030Prober.new,
40
40
  EUCKRProber.new,
41
41
  Big5Prober.new,
42
42
  EUCTWProber.new
@@ -239,9 +239,9 @@ module CharDet
239
239
  'name' => 'x-euc-tw'
240
240
  }.freeze
241
241
 
242
- # GB2312
242
+ # GB18030
243
243
 
244
- GB2312_cls = [
244
+ GB18030_cls = [
245
245
  1,1,1,1,1,1,1,1, # 00 - 07
246
246
  1,1,1,1,1,1,0,0, # 08 - 0f
247
247
  1,1,1,1,1,1,1,1, # 10 - 17
@@ -276,7 +276,7 @@ module CharDet
276
276
  6,6,6,6,6,6,6,0 # f8 - ff
277
277
  ].freeze
278
278
 
279
- GB2312_st = [
279
+ GB18030_st = [
280
280
  EError,EStart,EStart,EStart,EStart,EStart, 3,EError,#00-07
281
281
  EError,EError,EError,EError,EError,EError,EItsMe,EItsMe,#08-0f
282
282
  EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EError,EError,EStart,#10-17
@@ -290,13 +290,13 @@ module CharDet
290
290
  # it is used for frequency analysis only, and we are validing
291
291
  # each code range there as well. So it is safe to set it to be
292
292
  # 2 here.
293
- GB2312CharLenTable = [0, 1, 1, 1, 1, 1, 2].freeze
293
+ GB18030CharLenTable = [0, 1, 1, 1, 1, 1, 2].freeze
294
294
 
295
- GB2312SMModel = {'classTable' => GB2312_cls,
295
+ GB18030SMModel = {'classTable' => GB18030_cls,
296
296
  'classFactor' => 7,
297
- 'stateTable' => GB2312_st,
298
- 'charLenTable' => GB2312CharLenTable,
299
- 'name' => 'GB2312'
297
+ 'stateTable' => GB18030_st,
298
+ 'charLenTable' => GB18030CharLenTable,
299
+ 'name' => 'GB18030'
300
300
  }.freeze
301
301
 
302
302
  # Shift_JIS
@@ -1,3 +1,3 @@
1
1
  module CharDet
2
- VERSION = "1.7.0"
2
+ VERSION = "1.8.0"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rchardet
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.7.0
4
+ version: 1.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Michael Grosser
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2018-01-07 00:00:00.000000000 Z
12
+ date: 2018-06-01 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description:
15
15
  email:
@@ -34,8 +34,8 @@ files:
34
34
  - lib/rchardet/euckrprober.rb
35
35
  - lib/rchardet/euctwfreq.rb
36
36
  - lib/rchardet/euctwprober.rb
37
- - lib/rchardet/gb2312freq.rb
38
- - lib/rchardet/gb2312prober.rb
37
+ - lib/rchardet/gb18030freq.rb
38
+ - lib/rchardet/gb18030prober.rb
39
39
  - lib/rchardet/hebrewprober.rb
40
40
  - lib/rchardet/jisfreq.rb
41
41
  - lib/rchardet/jpcntx.rb
@@ -75,7 +75,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
75
75
  version: '0'
76
76
  requirements: []
77
77
  rubyforge_project:
78
- rubygems_version: 2.6.14
78
+ rubygems_version: 2.7.6
79
79
  signing_key:
80
80
  specification_version: 4
81
81
  summary: Character encoding auto-detection in Ruby. As smart as your browser. Open