rchardet 1.3.1 → 1.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (39) hide show
  1. data/lib/rchardet.rb +1 -3
  2. data/lib/rchardet/big5freq.rb +2 -2
  3. data/lib/rchardet/big5prober.rb +2 -2
  4. data/lib/rchardet/chardistribution.rb +74 -69
  5. data/lib/rchardet/charsetgroupprober.rb +50 -52
  6. data/lib/rchardet/charsetprober.rb +2 -7
  7. data/lib/rchardet/codingstatemachine.rb +14 -13
  8. data/lib/rchardet/constants.rb +0 -0
  9. data/lib/rchardet/escprober.rb +34 -34
  10. data/lib/rchardet/escsm.rb +33 -32
  11. data/lib/rchardet/eucjpprober.rb +28 -28
  12. data/lib/rchardet/euckrfreq.rb +2 -1
  13. data/lib/rchardet/euckrprober.rb +2 -2
  14. data/lib/rchardet/euctwfreq.rb +2 -1
  15. data/lib/rchardet/euctwprober.rb +2 -2
  16. data/lib/rchardet/gb2312freq.rb +2 -2
  17. data/lib/rchardet/gb2312prober.rb +2 -2
  18. data/lib/rchardet/hebrewprober.rb +40 -40
  19. data/lib/rchardet/jisfreq.rb +2 -1
  20. data/lib/rchardet/jpcntx.rb +131 -130
  21. data/lib/rchardet/langbulgarianmodel.rb +6 -6
  22. data/lib/rchardet/langcyrillicmodel.rb +13 -13
  23. data/lib/rchardet/langgreekmodel.rb +5 -5
  24. data/lib/rchardet/langhebrewmodel.rb +3 -3
  25. data/lib/rchardet/langhungarianmodel.rb +5 -5
  26. data/lib/rchardet/langthaimodel.rb +3 -3
  27. data/lib/rchardet/latin1prober.rb +18 -18
  28. data/lib/rchardet/mbcharsetprober.rb +30 -30
  29. data/lib/rchardet/mbcsgroupprober.rb +9 -9
  30. data/lib/rchardet/mbcssm.rb +72 -72
  31. data/lib/rchardet/sbcharsetprober.rb +48 -50
  32. data/lib/rchardet/sbcsgroupprober.rb +16 -16
  33. data/lib/rchardet/sjisprober.rb +28 -28
  34. data/lib/rchardet/universaldetector.rb +92 -90
  35. data/lib/rchardet/utf8prober.rb +25 -25
  36. data/lib/rchardet/version.rb +3 -0
  37. metadata +30 -47
  38. data/COPYING +0 -504
  39. data/README +0 -12
@@ -51,7 +51,7 @@ module CharDet
51
51
  35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, # d0
52
52
  124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, # e0
53
53
  9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0
54
- ]
54
+ ].freeze
55
55
 
56
56
  Win1253_CharToOrderMap = [
57
57
  255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
@@ -70,7 +70,7 @@ module CharDet
70
70
  35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, # d0
71
71
  124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, # e0
72
72
  9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0
73
- ]
73
+ ].freeze
74
74
 
75
75
  # Model Table:
76
76
  # total sequences: 100%
@@ -207,7 +207,7 @@ module CharDet
207
207
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
208
208
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
209
209
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
210
- ]
210
+ ].freeze
211
211
 
212
212
  Latin7GreekModel = {
213
213
  'charToOrderMap' => Latin7_CharToOrderMap,
@@ -215,7 +215,7 @@ module CharDet
215
215
  'mTypicalPositiveRatio' => 0.982851,
216
216
  'keepEnglishLetter' => false,
217
217
  'charsetName' => "ISO-8859-7"
218
- }
218
+ }.freeze
219
219
 
220
220
  Win1253GreekModel = {
221
221
  'charToOrderMap' => Win1253_CharToOrderMap,
@@ -223,5 +223,5 @@ module CharDet
223
223
  'mTypicalPositiveRatio' => 0.982851,
224
224
  'keepEnglishLetter' => false,
225
225
  'charsetName' => "windows-1253"
226
- }
226
+ }.freeze
227
227
  end
@@ -53,7 +53,7 @@ Win1255_CharToOrderMap = [
53
53
  238, 38, 45,239,240,241,242,243,127,244,245,246,247,248,249,250,
54
54
  9, 8, 20, 16, 3, 2, 24, 14, 22, 1, 25, 15, 4, 11, 6, 23,
55
55
  12, 19, 13, 26, 18, 27, 21, 17, 7, 10, 5,251,252,128, 96,253,
56
- ]
56
+ ].freeze
57
57
 
58
58
  # Model Table:
59
59
  # total sequences: 100%
@@ -190,7 +190,7 @@ HebrewLangModel = [
190
190
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
191
191
  1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
192
192
  0,0,0,0,0,0,0,0,0,0,1,2,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0,
193
- ]
193
+ ].freeze
194
194
 
195
195
  Win1255HebrewModel = {
196
196
  'charToOrderMap' => Win1255_CharToOrderMap,
@@ -198,5 +198,5 @@ Win1255HebrewModel = {
198
198
  'mTypicalPositiveRatio' => 0.984004,
199
199
  'keepEnglishLetter' => false,
200
200
  'charsetName' => "windows-1255"
201
- }
201
+ }.freeze
202
202
  end
@@ -50,7 +50,7 @@ Latin2_HungarianCharToOrderMap = [
50
50
  232,233,234, 58,235, 66, 59,236,237,238, 60, 69, 63,239,240,241,
51
51
  82, 14, 74,242, 70, 80,243, 72,244, 15, 83, 77, 84, 30, 76, 85,
52
52
  245,246,247, 25, 73, 42, 24,248,249,250, 31, 56, 29,251,252,253,
53
- ]
53
+ ].freeze
54
54
 
55
55
  Win1250HungarianCharToOrderMap = [
56
56
  255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
@@ -69,7 +69,7 @@ Win1250HungarianCharToOrderMap = [
69
69
  232,233,234, 58,235, 66, 59,236,237,238, 60, 70, 63,239,240,241,
70
70
  84, 14, 75,242, 71, 82,243, 73,244, 15, 85, 79, 86, 30, 77, 87,
71
71
  245,246,247, 25, 74, 42, 24,248,249,250, 31, 56, 29,251,252,253,
72
- ]
72
+ ].freeze
73
73
 
74
74
  # Model Table:
75
75
  # total sequences: 100%
@@ -206,7 +206,7 @@ HungarianLangModel = [
206
206
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
207
207
  1,0,0,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,
208
208
  0,1,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,
209
- ]
209
+ ].freeze
210
210
 
211
211
  Latin2HungarianModel = {
212
212
  'charToOrderMap' => Latin2_HungarianCharToOrderMap,
@@ -214,7 +214,7 @@ Latin2HungarianModel = {
214
214
  'mTypicalPositiveRatio' => 0.947368,
215
215
  'keepEnglishLetter' => true,
216
216
  'charsetName' => "ISO-8859-2"
217
- }
217
+ }.freeze
218
218
 
219
219
  Win1250HungarianModel = {
220
220
  'charToOrderMap' => Win1250HungarianCharToOrderMap,
@@ -222,5 +222,5 @@ Win1250HungarianModel = {
222
222
  'mTypicalPositiveRatio' => 0.947368,
223
223
  'keepEnglishLetter' => true,
224
224
  'charsetName' => "windows-1250"
225
- }
225
+ }.freeze
226
226
  end
@@ -52,7 +52,7 @@ TIS620CharToOrderMap = [
52
52
  22, 10, 1, 36, 23, 13, 40, 27, 32, 35, 86,240,241,242,243,244,
53
53
  11, 28, 41, 29, 33,245, 50, 37, 6, 7, 67, 77, 38, 93,246,247,
54
54
  68, 56, 59, 65, 69, 60, 70, 80, 71, 87,248,249,250,251,252,253,
55
- ]
55
+ ].freeze
56
56
 
57
57
  # Model Table:
58
58
  # total sequences: 100%
@@ -189,7 +189,7 @@ ThaiLangModel = [
189
189
  0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
190
190
  2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
191
191
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
192
- ]
192
+ ].freeze
193
193
 
194
194
  TIS620ThaiModel = {
195
195
  'charToOrderMap' => TIS620CharToOrderMap,
@@ -197,5 +197,5 @@ TIS620ThaiModel = {
197
197
  'mTypicalPositiveRatio' => 0.926386,
198
198
  'keepEnglishLetter' => false,
199
199
  'charsetName' => "TIS-620"
200
- }
200
+ }.freeze
201
201
  end
@@ -73,7 +73,7 @@ module CharDet
73
73
  ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, # E8 - EF
74
74
  ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH, # F0 - F7
75
75
  ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO, # F8 - FF
76
- ]
76
+ ].freeze
77
77
 
78
78
  # 0 : illegal
79
79
  # 1 : very unlikely
@@ -89,7 +89,7 @@ module CharDet
89
89
  0, 3, 3, 3, 3, 3, 3, 3, # ACO
90
90
  0, 3, 1, 3, 1, 1, 1, 3, # ASV
91
91
  0, 3, 1, 3, 1, 1, 3, 3, # ASO
92
- ]
92
+ ].freeze
93
93
 
94
94
  class Latin1Prober < CharSetProber
95
95
  def initialize
@@ -98,8 +98,8 @@ module CharDet
98
98
  end
99
99
 
100
100
  def reset
101
- @_mLastCharClass = OTH
102
- @_mFreqCounter = [0] * FREQ_CAT_NUM
101
+ @lastCharClass = OTH
102
+ @freqCounter = [0] * FREQ_CAT_NUM
103
103
  super
104
104
  end
105
105
 
@@ -110,15 +110,15 @@ module CharDet
110
110
  def feed(aBuf)
111
111
  aBuf = filter_with_english_letters(aBuf)
112
112
  aBuf.each_byte do |b|
113
- c = b.chr
114
- charClass = Latin1_CharToClass[c[0]]
115
- freq = Latin1ClassModel[(@_mLastCharClass * CLASS_NUM) + charClass]
116
- if freq == 0
117
- @_mState = ENotMe
118
- break
119
- end
120
- @_mFreqCounter[freq] += 1
121
- @_mLastCharClass = charClass
113
+ c = b.chr
114
+ charClass = Latin1_CharToClass[c.bytes.first]
115
+ freq = Latin1ClassModel[(@lastCharClass * CLASS_NUM) + charClass]
116
+ if freq == 0
117
+ @state = ENotMe
118
+ break
119
+ end
120
+ @freqCounter[freq] += 1
121
+ @lastCharClass = charClass
122
122
  end
123
123
 
124
124
  return get_state()
@@ -126,17 +126,17 @@ module CharDet
126
126
 
127
127
  def get_confidence
128
128
  if get_state() == ENotMe
129
- return 0.01
129
+ return 0.01
130
130
  end
131
131
 
132
- total = @_mFreqCounter.inject{|a,b| a+b}
132
+ total = @freqCounter.inject{|a,b| a+b}
133
133
  if total < 0.01
134
- confidence = 0.0
134
+ confidence = 0.0
135
135
  else
136
- confidence = (@_mFreqCounter[3] / total) - (@_mFreqCounter[1] * 20.0 / total)
136
+ confidence = (@freqCounter[3] / total) - (@freqCounter[1] * 20.0 / total)
137
137
  end
138
138
  if confidence < 0.0
139
- confidence = 0.0
139
+ confidence = 0.0
140
140
  end
141
141
  # lower the confidence of latin1 so that other more accurate detector
142
142
  # can take priority.
@@ -32,20 +32,20 @@ module CharDet
32
32
  class MultiByteCharSetProber < CharSetProber
33
33
  def initialize
34
34
  super
35
- @_mDistributionAnalyzer = nil
36
- @_mCodingSM = nil
37
- @_mLastChar = "\x00\x00"
35
+ @distributionAnalyzer = nil
36
+ @codingSM = nil
37
+ @lastChar = "\x00\x00"
38
38
  end
39
39
 
40
40
  def reset
41
41
  super
42
- if @_mCodingSM
43
- @_mCodingSM.reset()
42
+ if @codingSM
43
+ @codingSM.reset()
44
44
  end
45
- if @_mDistributionAnalyzer
46
- @_mDistributionAnalyzer.reset()
45
+ if @distributionAnalyzer
46
+ @distributionAnalyzer.reset()
47
47
  end
48
- @_mLastChar = "\x00\x00"
48
+ @lastChar = "\x00\x00"
49
49
  end
50
50
 
51
51
  def get_charset_name
@@ -54,36 +54,36 @@ module CharDet
54
54
  def feed(aBuf)
55
55
  aLen = aBuf.length
56
56
  for i in (0...aLen)
57
- codingState = @_mCodingSM.next_state(aBuf[i..i])
58
- if codingState == EError
59
- $stderr << "#{get_charset_name} prober hit error at byte #{i}\n" if $debug
60
- @_mState = ENotMe
61
- break
62
- elsif codingState == EItsMe
63
- @_mState = EFoundIt
64
- break
65
- elsif codingState == EStart
66
- charLen = @_mCodingSM.get_current_charlen()
67
- if i == 0
68
- @_mLastChar[1] = aBuf[0..0]
69
- @_mDistributionAnalyzer.feed(@_mLastChar, charLen)
70
- else
71
- @_mDistributionAnalyzer.feed(aBuf[i-1...i+1], charLen)
72
- end
73
- end
57
+ codingState = @codingSM.next_state(aBuf[i, 1])
58
+ if codingState == EError
59
+ $stderr << "#{get_charset_name} prober hit error at byte #{i}\n" if $debug
60
+ @state = ENotMe
61
+ break
62
+ elsif codingState == EItsMe
63
+ @state = EFoundIt
64
+ break
65
+ elsif codingState == EStart
66
+ charLen = @codingSM.get_current_charlen()
67
+ if i == 0
68
+ @lastChar[1] = aBuf[0, 1]
69
+ @distributionAnalyzer.feed(@lastChar, charLen)
70
+ else
71
+ @distributionAnalyzer.feed(aBuf[i-1, 2], charLen)
72
+ end
73
+ end
74
74
  end
75
- @_mLastChar[0] = aBuf[aLen-1..aLen-1]
75
+ @lastChar[0] = aBuf[aLen-1, 1]
76
76
 
77
77
  if get_state() == EDetecting
78
- if @_mDistributionAnalyzer.got_enough_data() and (get_confidence() > SHORTCUT_THRESHOLD)
79
- @_mState = EFoundIt
80
- end
78
+ if @distributionAnalyzer.got_enough_data() and (get_confidence() > SHORTCUT_THRESHOLD)
79
+ @state = EFoundIt
80
+ end
81
81
  end
82
82
  return get_state()
83
83
  end
84
84
 
85
85
  def get_confidence
86
- return @_mDistributionAnalyzer.get_confidence()
86
+ return @distributionAnalyzer.get_confidence()
87
87
  end
88
88
  end
89
89
  end
@@ -32,15 +32,15 @@ module CharDet
32
32
  class MBCSGroupProber < CharSetGroupProber
33
33
  def initialize
34
34
  super
35
- @_mProbers = [
36
- UTF8Prober.new,
37
- SJISProber.new,
38
- EUCJPProber.new,
39
- GB2312Prober.new,
40
- EUCKRProber.new,
41
- Big5Prober.new,
42
- EUCTWProber.new
43
- ]
35
+ @probers = [
36
+ UTF8Prober.new,
37
+ SJISProber.new,
38
+ EUCJPProber.new,
39
+ GB2312Prober.new,
40
+ EUCKRProber.new,
41
+ Big5Prober.new,
42
+ EUCTWProber.new
43
+ ]
44
44
  reset()
45
45
  end
46
46
  end
@@ -62,22 +62,22 @@ module CharDet
62
62
  3,3,3,3,3,3,3,3, # e8 - ef
63
63
  3,3,3,3,3,3,3,3, # f0 - f7
64
64
  3,3,3,3,3,3,3,0 # f8 - ff
65
- ]
65
+ ].freeze
66
66
 
67
67
  BIG5_st = [
68
68
  EError,EStart,EStart, 3,EError,EError,EError,EError,#00-07
69
69
  EError,EError,EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EError,#08-0f
70
70
  EError,EStart,EStart,EStart,EStart,EStart,EStart,EStart #10-17
71
- ]
71
+ ].freeze
72
72
 
73
- Big5CharLenTable = [0, 1, 1, 2, 0]
73
+ Big5CharLenTable = [0, 1, 1, 2, 0].freeze
74
74
 
75
75
  Big5SMModel = {'classTable' => BIG5_cls,
76
- 'classFactor' => 5,
77
- 'stateTable' => BIG5_st,
78
- 'charLenTable' => Big5CharLenTable,
79
- 'name' => 'Big5'
80
- }
76
+ 'classFactor' => 5,
77
+ 'stateTable' => BIG5_st,
78
+ 'charLenTable' => Big5CharLenTable,
79
+ 'name' => 'Big5'
80
+ }.freeze
81
81
 
82
82
  # EUC-JP
83
83
 
@@ -114,7 +114,7 @@ module CharDet
114
114
  0,0,0,0,0,0,0,0, # e8 - ef
115
115
  0,0,0,0,0,0,0,0, # f0 - f7
116
116
  0,0,0,0,0,0,0,5 # f8 - ff
117
- ]
117
+ ].freeze
118
118
 
119
119
  EUCJP_st = [
120
120
  3, 4, 3, 5,EStart,EError,EError,EError,#00-07
@@ -122,16 +122,16 @@ module CharDet
122
122
  EItsMe,EItsMe,EStart,EError,EStart,EError,EError,EError,#10-17
123
123
  EError,EError,EStart,EError,EError,EError, 3,EError,#18-1f
124
124
  3,EError,EError,EError,EStart,EStart,EStart,EStart #20-27
125
- ]
125
+ ].freeze
126
126
 
127
- EUCJPCharLenTable = [2, 2, 2, 3, 1, 0]
127
+ EUCJPCharLenTable = [2, 2, 2, 3, 1, 0].freeze
128
128
 
129
129
  EUCJPSMModel = {'classTable' => EUCJP_cls,
130
- 'classFactor' => 6,
131
- 'stateTable' => EUCJP_st,
132
- 'charLenTable' => EUCJPCharLenTable,
133
- 'name' => 'EUC-JP'
134
- }
130
+ 'classFactor' => 6,
131
+ 'stateTable' => EUCJP_st,
132
+ 'charLenTable' => EUCJPCharLenTable,
133
+ 'name' => 'EUC-JP'
134
+ }.freeze
135
135
 
136
136
  # EUC-KR
137
137
 
@@ -168,21 +168,21 @@ module CharDet
168
168
  2,2,2,2,2,2,2,2, # e8 - ef
169
169
  2,2,2,2,2,2,2,2, # f0 - f7
170
170
  2,2,2,2,2,2,2,0 # f8 - ff
171
- ]
171
+ ].freeze
172
172
 
173
173
  EUCKR_st = [
174
174
  EError,EStart, 3,EError,EError,EError,EError,EError,#00-07
175
175
  EItsMe,EItsMe,EItsMe,EItsMe,EError,EError,EStart,EStart#08-0f
176
- ]
176
+ ].freeze
177
177
 
178
- EUCKRCharLenTable = [0, 1, 2, 0]
178
+ EUCKRCharLenTable = [0, 1, 2, 0].freeze
179
179
 
180
180
  EUCKRSMModel = {'classTable' => EUCKR_cls,
181
- 'classFactor' => 4,
182
- 'stateTable' => EUCKR_st,
183
- 'charLenTable' => EUCKRCharLenTable,
184
- 'name' => 'EUC-KR'
185
- }
181
+ 'classFactor' => 4,
182
+ 'stateTable' => EUCKR_st,
183
+ 'charLenTable' => EUCKRCharLenTable,
184
+ 'name' => 'EUC-KR'
185
+ }.freeze
186
186
 
187
187
  # EUC-TW
188
188
 
@@ -219,7 +219,7 @@ module CharDet
219
219
  3,3,3,3,3,3,3,3, # e8 - ef
220
220
  3,3,3,3,3,3,3,3, # f0 - f7
221
221
  3,3,3,3,3,3,3,0 # f8 - ff
222
- ]
222
+ ].freeze
223
223
 
224
224
  EUCTW_st = [
225
225
  EError,EError,EStart, 3, 3, 3, 4,EError,#00-07
@@ -228,16 +228,16 @@ module CharDet
228
228
  EStart,EStart,EStart,EError,EError,EError,EError,EError,#18-1f
229
229
  5,EError,EError,EError,EStart,EError,EStart,EStart,#20-27
230
230
  EStart,EError,EStart,EStart,EStart,EStart,EStart,EStart #28-2f
231
- ]
231
+ ].freeze
232
232
 
233
- EUCTWCharLenTable = [0, 0, 1, 2, 2, 2, 3]
233
+ EUCTWCharLenTable = [0, 0, 1, 2, 2, 2, 3].freeze
234
234
 
235
235
  EUCTWSMModel = {'classTable' => EUCTW_cls,
236
- 'classFactor' => 7,
237
- 'stateTable' => EUCTW_st,
238
- 'charLenTable' => EUCTWCharLenTable,
239
- 'name' => 'x-euc-tw'
240
- }
236
+ 'classFactor' => 7,
237
+ 'stateTable' => EUCTW_st,
238
+ 'charLenTable' => EUCTWCharLenTable,
239
+ 'name' => 'x-euc-tw'
240
+ }.freeze
241
241
 
242
242
  # GB2312
243
243
 
@@ -274,7 +274,7 @@ module CharDet
274
274
  6,6,6,6,6,6,6,6, # e8 - ef
275
275
  6,6,6,6,6,6,6,6, # f0 - f7
276
276
  6,6,6,6,6,6,6,0 # f8 - ff
277
- ]
277
+ ].freeze
278
278
 
279
279
  GB2312_st = [
280
280
  EError,EStart,EStart,EStart,EStart,EStart, 3,EError,#00-07
@@ -283,21 +283,21 @@ module CharDet
283
283
  4,EError,EStart,EStart,EError,EError,EError,EError,#18-1f
284
284
  EError,EError, 5,EError,EError,EError,EItsMe,EError,#20-27
285
285
  EError,EError,EStart,EStart,EStart,EStart,EStart,EStart#28-2f
286
- ]
286
+ ].freeze
287
287
 
288
288
  # To be accurate, the length of class 6 can be either 2 or 4.
289
289
  # But it is not necessary to discriminate between the two since
290
290
  # it is used for frequency analysis only, and we are validing
291
291
  # each code range there as well. So it is safe to set it to be
292
292
  # 2 here.
293
- GB2312CharLenTable = [0, 1, 1, 1, 1, 1, 2]
293
+ GB2312CharLenTable = [0, 1, 1, 1, 1, 1, 2].freeze
294
294
 
295
295
  GB2312SMModel = {'classTable' => GB2312_cls,
296
- 'classFactor' => 7,
297
- 'stateTable' => GB2312_st,
298
- 'charLenTable' => GB2312CharLenTable,
299
- 'name' => 'GB2312'
300
- }
296
+ 'classFactor' => 7,
297
+ 'stateTable' => GB2312_st,
298
+ 'charLenTable' => GB2312CharLenTable,
299
+ 'name' => 'GB2312'
300
+ }.freeze
301
301
 
302
302
  # Shift_JIS
303
303
 
@@ -336,22 +336,22 @@ module CharDet
336
336
  3,3,3,3,3,4,4,4, # e8 - ef
337
337
  4,4,4,4,4,4,4,4, # f0 - f7
338
338
  4,4,4,4,4,0,0,0 # f8 - ff
339
- ]
339
+ ].freeze
340
340
 
341
341
  SJIS_st = [
342
342
  EError,EStart,EStart, 3,EError,EError,EError,EError,#00-07
343
343
  EError,EError,EError,EError,EItsMe,EItsMe,EItsMe,EItsMe,#08-0f
344
344
  EItsMe,EItsMe,EError,EError,EStart,EStart,EStart,EStart#10-17
345
- ]
345
+ ].freeze
346
346
 
347
- SJISCharLenTable = [0, 1, 1, 2, 0, 0]
347
+ SJISCharLenTable = [0, 1, 1, 2, 0, 0].freeze
348
348
 
349
349
  SJISSMModel = {'classTable' => SJIS_cls,
350
- 'classFactor' => 6,
351
- 'stateTable' => SJIS_st,
352
- 'charLenTable' => SJISCharLenTable,
353
- 'name' => 'Shift_JIS'
354
- }
350
+ 'classFactor' => 6,
351
+ 'stateTable' => SJIS_st,
352
+ 'charLenTable' => SJISCharLenTable,
353
+ 'name' => 'Shift_JIS'
354
+ }.freeze
355
355
 
356
356
  # UCS2-BE
357
357
 
@@ -388,7 +388,7 @@ module CharDet
388
388
  0,0,0,0,0,0,0,0, # e8 - ef
389
389
  0,0,0,0,0,0,0,0, # f0 - f7
390
390
  0,0,0,0,0,0,4,5 # f8 - ff
391
- ]
391
+ ].freeze
392
392
 
393
393
  UCS2BE_st = [
394
394
  5, 7, 7,EError, 4, 3,EError,EError,#00-07
@@ -398,16 +398,16 @@ module CharDet
398
398
  6, 6, 6, 6, 5, 7, 7,EError,#20-27
399
399
  5, 8, 6, 6,EError, 6, 6, 6,#28-2f
400
400
  6, 6, 6, 6,EError,EError,EStart,EStart#30-37
401
- ]
401
+ ].freeze
402
402
 
403
- UCS2BECharLenTable = [2, 2, 2, 0, 2, 2]
403
+ UCS2BECharLenTable = [2, 2, 2, 0, 2, 2].freeze
404
404
 
405
405
  UCS2BESMModel = {'classTable' => UCS2BE_cls,
406
- 'classFactor' => 6,
407
- 'stateTable' => UCS2BE_st,
408
- 'charLenTable' => UCS2BECharLenTable,
409
- 'name' => 'UTF-16BE'
410
- }
406
+ 'classFactor' => 6,
407
+ 'stateTable' => UCS2BE_st,
408
+ 'charLenTable' => UCS2BECharLenTable,
409
+ 'name' => 'UTF-16BE'
410
+ }.freeze
411
411
 
412
412
  # UCS2-LE
413
413
 
@@ -444,7 +444,7 @@ module CharDet
444
444
  0,0,0,0,0,0,0,0, # e8 - ef
445
445
  0,0,0,0,0,0,0,0, # f0 - f7
446
446
  0,0,0,0,0,0,4,5 # f8 - ff
447
- ]
447
+ ].freeze
448
448
 
449
449
  UCS2LE_st = [
450
450
  6, 6, 7, 6, 4, 3,EError,EError,#00-07
@@ -454,16 +454,16 @@ module CharDet
454
454
  7, 6, 8, 8, 5, 5, 5,EError,#20-27
455
455
  5, 5, 5,EError,EError,EError, 5, 5,#28-2f
456
456
  5, 5, 5,EError, 5,EError,EStart,EStart#30-37
457
- ]
457
+ ].freeze
458
458
 
459
- UCS2LECharLenTable = [2, 2, 2, 2, 2, 2]
459
+ UCS2LECharLenTable = [2, 2, 2, 2, 2, 2].freeze
460
460
 
461
461
  UCS2LESMModel = {'classTable' => UCS2LE_cls,
462
- 'classFactor' => 6,
463
- 'stateTable' => UCS2LE_st,
464
- 'charLenTable' => UCS2LECharLenTable,
465
- 'name' => 'UTF-16LE'
466
- }
462
+ 'classFactor' => 6,
463
+ 'stateTable' => UCS2LE_st,
464
+ 'charLenTable' => UCS2LECharLenTable,
465
+ 'name' => 'UTF-16LE'
466
+ }.freeze
467
467
 
468
468
  # UTF-8
469
469
 
@@ -500,7 +500,7 @@ module CharDet
500
500
  8,8,8,8,8,9,8,8, # e8 - ef
501
501
  10,11,11,11,11,11,11,11, # f0 - f7
502
502
  12,13,13,13,14,15,0,0 # f8 - ff
503
- ]
503
+ ].freeze
504
504
 
505
505
  UTF8_st = [
506
506
  EError,EStart,EError,EError,EError,EError, 12, 10,#00-07
@@ -529,14 +529,14 @@ module CharDet
529
529
  EError,EError,EError,EError,EError,EError,EError,EError,#b8-bf
530
530
  EError,EError,EStart,EStart,EStart,EStart,EError,EError,#c0-c7
531
531
  EError,EError,EError,EError,EError,EError,EError,EError#c8-cf
532
- ]
532
+ ].freeze
533
533
 
534
- UTF8CharLenTable = [0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6]
534
+ UTF8CharLenTable = [0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6].freeze
535
535
 
536
536
  UTF8SMModel = {'classTable' => UTF8_cls,
537
- 'classFactor' => 16,
538
- 'stateTable' => UTF8_st,
539
- 'charLenTable' => UTF8CharLenTable,
540
- 'name' => 'UTF-8'
541
- }
537
+ 'classFactor' => 16,
538
+ 'stateTable' => UTF8_st,
539
+ 'charLenTable' => UTF8CharLenTable,
540
+ 'name' => 'UTF-8'
541
+ }.freeze
542
542
  end