rchardet 1.3.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. data/lib/rchardet.rb +1 -3
  2. data/lib/rchardet/big5freq.rb +2 -2
  3. data/lib/rchardet/big5prober.rb +2 -2
  4. data/lib/rchardet/chardistribution.rb +74 -69
  5. data/lib/rchardet/charsetgroupprober.rb +50 -52
  6. data/lib/rchardet/charsetprober.rb +2 -7
  7. data/lib/rchardet/codingstatemachine.rb +14 -13
  8. data/lib/rchardet/constants.rb +0 -0
  9. data/lib/rchardet/escprober.rb +34 -34
  10. data/lib/rchardet/escsm.rb +33 -32
  11. data/lib/rchardet/eucjpprober.rb +28 -28
  12. data/lib/rchardet/euckrfreq.rb +2 -1
  13. data/lib/rchardet/euckrprober.rb +2 -2
  14. data/lib/rchardet/euctwfreq.rb +2 -1
  15. data/lib/rchardet/euctwprober.rb +2 -2
  16. data/lib/rchardet/gb2312freq.rb +2 -2
  17. data/lib/rchardet/gb2312prober.rb +2 -2
  18. data/lib/rchardet/hebrewprober.rb +40 -40
  19. data/lib/rchardet/jisfreq.rb +2 -1
  20. data/lib/rchardet/jpcntx.rb +131 -130
  21. data/lib/rchardet/langbulgarianmodel.rb +6 -6
  22. data/lib/rchardet/langcyrillicmodel.rb +13 -13
  23. data/lib/rchardet/langgreekmodel.rb +5 -5
  24. data/lib/rchardet/langhebrewmodel.rb +3 -3
  25. data/lib/rchardet/langhungarianmodel.rb +5 -5
  26. data/lib/rchardet/langthaimodel.rb +3 -3
  27. data/lib/rchardet/latin1prober.rb +18 -18
  28. data/lib/rchardet/mbcharsetprober.rb +30 -30
  29. data/lib/rchardet/mbcsgroupprober.rb +9 -9
  30. data/lib/rchardet/mbcssm.rb +72 -72
  31. data/lib/rchardet/sbcharsetprober.rb +48 -50
  32. data/lib/rchardet/sbcsgroupprober.rb +16 -16
  33. data/lib/rchardet/sjisprober.rb +28 -28
  34. data/lib/rchardet/universaldetector.rb +92 -90
  35. data/lib/rchardet/utf8prober.rb +25 -25
  36. data/lib/rchardet/version.rb +3 -0
  37. metadata +30 -47
  38. data/COPYING +0 -504
  39. data/README +0 -12
@@ -51,7 +51,7 @@ module CharDet
51
51
  35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, # d0
52
52
  124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, # e0
53
53
  9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0
54
- ]
54
+ ].freeze
55
55
 
56
56
  Win1253_CharToOrderMap = [
57
57
  255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
@@ -70,7 +70,7 @@ module CharDet
70
70
  35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, # d0
71
71
  124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, # e0
72
72
  9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, # f0
73
- ]
73
+ ].freeze
74
74
 
75
75
  # Model Table:
76
76
  # total sequences: 100%
@@ -207,7 +207,7 @@ module CharDet
207
207
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
208
208
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
209
209
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
210
- ]
210
+ ].freeze
211
211
 
212
212
  Latin7GreekModel = {
213
213
  'charToOrderMap' => Latin7_CharToOrderMap,
@@ -215,7 +215,7 @@ module CharDet
215
215
  'mTypicalPositiveRatio' => 0.982851,
216
216
  'keepEnglishLetter' => false,
217
217
  'charsetName' => "ISO-8859-7"
218
- }
218
+ }.freeze
219
219
 
220
220
  Win1253GreekModel = {
221
221
  'charToOrderMap' => Win1253_CharToOrderMap,
@@ -223,5 +223,5 @@ module CharDet
223
223
  'mTypicalPositiveRatio' => 0.982851,
224
224
  'keepEnglishLetter' => false,
225
225
  'charsetName' => "windows-1253"
226
- }
226
+ }.freeze
227
227
  end
@@ -53,7 +53,7 @@ Win1255_CharToOrderMap = [
53
53
  238, 38, 45,239,240,241,242,243,127,244,245,246,247,248,249,250,
54
54
  9, 8, 20, 16, 3, 2, 24, 14, 22, 1, 25, 15, 4, 11, 6, 23,
55
55
  12, 19, 13, 26, 18, 27, 21, 17, 7, 10, 5,251,252,128, 96,253,
56
- ]
56
+ ].freeze
57
57
 
58
58
  # Model Table:
59
59
  # total sequences: 100%
@@ -190,7 +190,7 @@ HebrewLangModel = [
190
190
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
191
191
  1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
192
192
  0,0,0,0,0,0,0,0,0,0,1,2,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0,
193
- ]
193
+ ].freeze
194
194
 
195
195
  Win1255HebrewModel = {
196
196
  'charToOrderMap' => Win1255_CharToOrderMap,
@@ -198,5 +198,5 @@ Win1255HebrewModel = {
198
198
  'mTypicalPositiveRatio' => 0.984004,
199
199
  'keepEnglishLetter' => false,
200
200
  'charsetName' => "windows-1255"
201
- }
201
+ }.freeze
202
202
  end
@@ -50,7 +50,7 @@ Latin2_HungarianCharToOrderMap = [
50
50
  232,233,234, 58,235, 66, 59,236,237,238, 60, 69, 63,239,240,241,
51
51
  82, 14, 74,242, 70, 80,243, 72,244, 15, 83, 77, 84, 30, 76, 85,
52
52
  245,246,247, 25, 73, 42, 24,248,249,250, 31, 56, 29,251,252,253,
53
- ]
53
+ ].freeze
54
54
 
55
55
  Win1250HungarianCharToOrderMap = [
56
56
  255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, # 00
@@ -69,7 +69,7 @@ Win1250HungarianCharToOrderMap = [
69
69
  232,233,234, 58,235, 66, 59,236,237,238, 60, 70, 63,239,240,241,
70
70
  84, 14, 75,242, 71, 82,243, 73,244, 15, 85, 79, 86, 30, 77, 87,
71
71
  245,246,247, 25, 74, 42, 24,248,249,250, 31, 56, 29,251,252,253,
72
- ]
72
+ ].freeze
73
73
 
74
74
  # Model Table:
75
75
  # total sequences: 100%
@@ -206,7 +206,7 @@ HungarianLangModel = [
206
206
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
207
207
  1,0,0,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,
208
208
  0,1,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,
209
- ]
209
+ ].freeze
210
210
 
211
211
  Latin2HungarianModel = {
212
212
  'charToOrderMap' => Latin2_HungarianCharToOrderMap,
@@ -214,7 +214,7 @@ Latin2HungarianModel = {
214
214
  'mTypicalPositiveRatio' => 0.947368,
215
215
  'keepEnglishLetter' => true,
216
216
  'charsetName' => "ISO-8859-2"
217
- }
217
+ }.freeze
218
218
 
219
219
  Win1250HungarianModel = {
220
220
  'charToOrderMap' => Win1250HungarianCharToOrderMap,
@@ -222,5 +222,5 @@ Win1250HungarianModel = {
222
222
  'mTypicalPositiveRatio' => 0.947368,
223
223
  'keepEnglishLetter' => true,
224
224
  'charsetName' => "windows-1250"
225
- }
225
+ }.freeze
226
226
  end
@@ -52,7 +52,7 @@ TIS620CharToOrderMap = [
52
52
  22, 10, 1, 36, 23, 13, 40, 27, 32, 35, 86,240,241,242,243,244,
53
53
  11, 28, 41, 29, 33,245, 50, 37, 6, 7, 67, 77, 38, 93,246,247,
54
54
  68, 56, 59, 65, 69, 60, 70, 80, 71, 87,248,249,250,251,252,253,
55
- ]
55
+ ].freeze
56
56
 
57
57
  # Model Table:
58
58
  # total sequences: 100%
@@ -189,7 +189,7 @@ ThaiLangModel = [
189
189
  0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
190
190
  2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
191
191
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
192
- ]
192
+ ].freeze
193
193
 
194
194
  TIS620ThaiModel = {
195
195
  'charToOrderMap' => TIS620CharToOrderMap,
@@ -197,5 +197,5 @@ TIS620ThaiModel = {
197
197
  'mTypicalPositiveRatio' => 0.926386,
198
198
  'keepEnglishLetter' => false,
199
199
  'charsetName' => "TIS-620"
200
- }
200
+ }.freeze
201
201
  end
@@ -73,7 +73,7 @@ module CharDet
73
73
  ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, # E8 - EF
74
74
  ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH, # F0 - F7
75
75
  ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO, # F8 - FF
76
- ]
76
+ ].freeze
77
77
 
78
78
  # 0 : illegal
79
79
  # 1 : very unlikely
@@ -89,7 +89,7 @@ module CharDet
89
89
  0, 3, 3, 3, 3, 3, 3, 3, # ACO
90
90
  0, 3, 1, 3, 1, 1, 1, 3, # ASV
91
91
  0, 3, 1, 3, 1, 1, 3, 3, # ASO
92
- ]
92
+ ].freeze
93
93
 
94
94
  class Latin1Prober < CharSetProber
95
95
  def initialize
@@ -98,8 +98,8 @@ module CharDet
98
98
  end
99
99
 
100
100
  def reset
101
- @_mLastCharClass = OTH
102
- @_mFreqCounter = [0] * FREQ_CAT_NUM
101
+ @lastCharClass = OTH
102
+ @freqCounter = [0] * FREQ_CAT_NUM
103
103
  super
104
104
  end
105
105
 
@@ -110,15 +110,15 @@ module CharDet
110
110
  def feed(aBuf)
111
111
  aBuf = filter_with_english_letters(aBuf)
112
112
  aBuf.each_byte do |b|
113
- c = b.chr
114
- charClass = Latin1_CharToClass[c[0]]
115
- freq = Latin1ClassModel[(@_mLastCharClass * CLASS_NUM) + charClass]
116
- if freq == 0
117
- @_mState = ENotMe
118
- break
119
- end
120
- @_mFreqCounter[freq] += 1
121
- @_mLastCharClass = charClass
113
+ c = b.chr
114
+ charClass = Latin1_CharToClass[c.bytes.first]
115
+ freq = Latin1ClassModel[(@lastCharClass * CLASS_NUM) + charClass]
116
+ if freq == 0
117
+ @state = ENotMe
118
+ break
119
+ end
120
+ @freqCounter[freq] += 1
121
+ @lastCharClass = charClass
122
122
  end
123
123
 
124
124
  return get_state()
@@ -126,17 +126,17 @@ module CharDet
126
126
 
127
127
  def get_confidence
128
128
  if get_state() == ENotMe
129
- return 0.01
129
+ return 0.01
130
130
  end
131
131
 
132
- total = @_mFreqCounter.inject{|a,b| a+b}
132
+ total = @freqCounter.inject{|a,b| a+b}
133
133
  if total < 0.01
134
- confidence = 0.0
134
+ confidence = 0.0
135
135
  else
136
- confidence = (@_mFreqCounter[3] / total) - (@_mFreqCounter[1] * 20.0 / total)
136
+ confidence = (@freqCounter[3] / total) - (@freqCounter[1] * 20.0 / total)
137
137
  end
138
138
  if confidence < 0.0
139
- confidence = 0.0
139
+ confidence = 0.0
140
140
  end
141
141
  # lower the confidence of latin1 so that other more accurate detector
142
142
  # can take priority.
@@ -32,20 +32,20 @@ module CharDet
32
32
  class MultiByteCharSetProber < CharSetProber
33
33
  def initialize
34
34
  super
35
- @_mDistributionAnalyzer = nil
36
- @_mCodingSM = nil
37
- @_mLastChar = "\x00\x00"
35
+ @distributionAnalyzer = nil
36
+ @codingSM = nil
37
+ @lastChar = "\x00\x00"
38
38
  end
39
39
 
40
40
  def reset
41
41
  super
42
- if @_mCodingSM
43
- @_mCodingSM.reset()
42
+ if @codingSM
43
+ @codingSM.reset()
44
44
  end
45
- if @_mDistributionAnalyzer
46
- @_mDistributionAnalyzer.reset()
45
+ if @distributionAnalyzer
46
+ @distributionAnalyzer.reset()
47
47
  end
48
- @_mLastChar = "\x00\x00"
48
+ @lastChar = "\x00\x00"
49
49
  end
50
50
 
51
51
  def get_charset_name
@@ -54,36 +54,36 @@ module CharDet
54
54
  def feed(aBuf)
55
55
  aLen = aBuf.length
56
56
  for i in (0...aLen)
57
- codingState = @_mCodingSM.next_state(aBuf[i..i])
58
- if codingState == EError
59
- $stderr << "#{get_charset_name} prober hit error at byte #{i}\n" if $debug
60
- @_mState = ENotMe
61
- break
62
- elsif codingState == EItsMe
63
- @_mState = EFoundIt
64
- break
65
- elsif codingState == EStart
66
- charLen = @_mCodingSM.get_current_charlen()
67
- if i == 0
68
- @_mLastChar[1] = aBuf[0..0]
69
- @_mDistributionAnalyzer.feed(@_mLastChar, charLen)
70
- else
71
- @_mDistributionAnalyzer.feed(aBuf[i-1...i+1], charLen)
72
- end
73
- end
57
+ codingState = @codingSM.next_state(aBuf[i, 1])
58
+ if codingState == EError
59
+ $stderr << "#{get_charset_name} prober hit error at byte #{i}\n" if $debug
60
+ @state = ENotMe
61
+ break
62
+ elsif codingState == EItsMe
63
+ @state = EFoundIt
64
+ break
65
+ elsif codingState == EStart
66
+ charLen = @codingSM.get_current_charlen()
67
+ if i == 0
68
+ @lastChar[1] = aBuf[0, 1]
69
+ @distributionAnalyzer.feed(@lastChar, charLen)
70
+ else
71
+ @distributionAnalyzer.feed(aBuf[i-1, 2], charLen)
72
+ end
73
+ end
74
74
  end
75
- @_mLastChar[0] = aBuf[aLen-1..aLen-1]
75
+ @lastChar[0] = aBuf[aLen-1, 1]
76
76
 
77
77
  if get_state() == EDetecting
78
- if @_mDistributionAnalyzer.got_enough_data() and (get_confidence() > SHORTCUT_THRESHOLD)
79
- @_mState = EFoundIt
80
- end
78
+ if @distributionAnalyzer.got_enough_data() and (get_confidence() > SHORTCUT_THRESHOLD)
79
+ @state = EFoundIt
80
+ end
81
81
  end
82
82
  return get_state()
83
83
  end
84
84
 
85
85
  def get_confidence
86
- return @_mDistributionAnalyzer.get_confidence()
86
+ return @distributionAnalyzer.get_confidence()
87
87
  end
88
88
  end
89
89
  end
@@ -32,15 +32,15 @@ module CharDet
32
32
  class MBCSGroupProber < CharSetGroupProber
33
33
  def initialize
34
34
  super
35
- @_mProbers = [
36
- UTF8Prober.new,
37
- SJISProber.new,
38
- EUCJPProber.new,
39
- GB2312Prober.new,
40
- EUCKRProber.new,
41
- Big5Prober.new,
42
- EUCTWProber.new
43
- ]
35
+ @probers = [
36
+ UTF8Prober.new,
37
+ SJISProber.new,
38
+ EUCJPProber.new,
39
+ GB2312Prober.new,
40
+ EUCKRProber.new,
41
+ Big5Prober.new,
42
+ EUCTWProber.new
43
+ ]
44
44
  reset()
45
45
  end
46
46
  end
@@ -62,22 +62,22 @@ module CharDet
62
62
  3,3,3,3,3,3,3,3, # e8 - ef
63
63
  3,3,3,3,3,3,3,3, # f0 - f7
64
64
  3,3,3,3,3,3,3,0 # f8 - ff
65
- ]
65
+ ].freeze
66
66
 
67
67
  BIG5_st = [
68
68
  EError,EStart,EStart, 3,EError,EError,EError,EError,#00-07
69
69
  EError,EError,EItsMe,EItsMe,EItsMe,EItsMe,EItsMe,EError,#08-0f
70
70
  EError,EStart,EStart,EStart,EStart,EStart,EStart,EStart #10-17
71
- ]
71
+ ].freeze
72
72
 
73
- Big5CharLenTable = [0, 1, 1, 2, 0]
73
+ Big5CharLenTable = [0, 1, 1, 2, 0].freeze
74
74
 
75
75
  Big5SMModel = {'classTable' => BIG5_cls,
76
- 'classFactor' => 5,
77
- 'stateTable' => BIG5_st,
78
- 'charLenTable' => Big5CharLenTable,
79
- 'name' => 'Big5'
80
- }
76
+ 'classFactor' => 5,
77
+ 'stateTable' => BIG5_st,
78
+ 'charLenTable' => Big5CharLenTable,
79
+ 'name' => 'Big5'
80
+ }.freeze
81
81
 
82
82
  # EUC-JP
83
83
 
@@ -114,7 +114,7 @@ module CharDet
114
114
  0,0,0,0,0,0,0,0, # e8 - ef
115
115
  0,0,0,0,0,0,0,0, # f0 - f7
116
116
  0,0,0,0,0,0,0,5 # f8 - ff
117
- ]
117
+ ].freeze
118
118
 
119
119
  EUCJP_st = [
120
120
  3, 4, 3, 5,EStart,EError,EError,EError,#00-07
@@ -122,16 +122,16 @@ module CharDet
122
122
  EItsMe,EItsMe,EStart,EError,EStart,EError,EError,EError,#10-17
123
123
  EError,EError,EStart,EError,EError,EError, 3,EError,#18-1f
124
124
  3,EError,EError,EError,EStart,EStart,EStart,EStart #20-27
125
- ]
125
+ ].freeze
126
126
 
127
- EUCJPCharLenTable = [2, 2, 2, 3, 1, 0]
127
+ EUCJPCharLenTable = [2, 2, 2, 3, 1, 0].freeze
128
128
 
129
129
  EUCJPSMModel = {'classTable' => EUCJP_cls,
130
- 'classFactor' => 6,
131
- 'stateTable' => EUCJP_st,
132
- 'charLenTable' => EUCJPCharLenTable,
133
- 'name' => 'EUC-JP'
134
- }
130
+ 'classFactor' => 6,
131
+ 'stateTable' => EUCJP_st,
132
+ 'charLenTable' => EUCJPCharLenTable,
133
+ 'name' => 'EUC-JP'
134
+ }.freeze
135
135
 
136
136
  # EUC-KR
137
137
 
@@ -168,21 +168,21 @@ module CharDet
168
168
  2,2,2,2,2,2,2,2, # e8 - ef
169
169
  2,2,2,2,2,2,2,2, # f0 - f7
170
170
  2,2,2,2,2,2,2,0 # f8 - ff
171
- ]
171
+ ].freeze
172
172
 
173
173
  EUCKR_st = [
174
174
  EError,EStart, 3,EError,EError,EError,EError,EError,#00-07
175
175
  EItsMe,EItsMe,EItsMe,EItsMe,EError,EError,EStart,EStart#08-0f
176
- ]
176
+ ].freeze
177
177
 
178
- EUCKRCharLenTable = [0, 1, 2, 0]
178
+ EUCKRCharLenTable = [0, 1, 2, 0].freeze
179
179
 
180
180
  EUCKRSMModel = {'classTable' => EUCKR_cls,
181
- 'classFactor' => 4,
182
- 'stateTable' => EUCKR_st,
183
- 'charLenTable' => EUCKRCharLenTable,
184
- 'name' => 'EUC-KR'
185
- }
181
+ 'classFactor' => 4,
182
+ 'stateTable' => EUCKR_st,
183
+ 'charLenTable' => EUCKRCharLenTable,
184
+ 'name' => 'EUC-KR'
185
+ }.freeze
186
186
 
187
187
  # EUC-TW
188
188
 
@@ -219,7 +219,7 @@ module CharDet
219
219
  3,3,3,3,3,3,3,3, # e8 - ef
220
220
  3,3,3,3,3,3,3,3, # f0 - f7
221
221
  3,3,3,3,3,3,3,0 # f8 - ff
222
- ]
222
+ ].freeze
223
223
 
224
224
  EUCTW_st = [
225
225
  EError,EError,EStart, 3, 3, 3, 4,EError,#00-07
@@ -228,16 +228,16 @@ module CharDet
228
228
  EStart,EStart,EStart,EError,EError,EError,EError,EError,#18-1f
229
229
  5,EError,EError,EError,EStart,EError,EStart,EStart,#20-27
230
230
  EStart,EError,EStart,EStart,EStart,EStart,EStart,EStart #28-2f
231
- ]
231
+ ].freeze
232
232
 
233
- EUCTWCharLenTable = [0, 0, 1, 2, 2, 2, 3]
233
+ EUCTWCharLenTable = [0, 0, 1, 2, 2, 2, 3].freeze
234
234
 
235
235
  EUCTWSMModel = {'classTable' => EUCTW_cls,
236
- 'classFactor' => 7,
237
- 'stateTable' => EUCTW_st,
238
- 'charLenTable' => EUCTWCharLenTable,
239
- 'name' => 'x-euc-tw'
240
- }
236
+ 'classFactor' => 7,
237
+ 'stateTable' => EUCTW_st,
238
+ 'charLenTable' => EUCTWCharLenTable,
239
+ 'name' => 'x-euc-tw'
240
+ }.freeze
241
241
 
242
242
  # GB2312
243
243
 
@@ -274,7 +274,7 @@ module CharDet
274
274
  6,6,6,6,6,6,6,6, # e8 - ef
275
275
  6,6,6,6,6,6,6,6, # f0 - f7
276
276
  6,6,6,6,6,6,6,0 # f8 - ff
277
- ]
277
+ ].freeze
278
278
 
279
279
  GB2312_st = [
280
280
  EError,EStart,EStart,EStart,EStart,EStart, 3,EError,#00-07
@@ -283,21 +283,21 @@ module CharDet
283
283
  4,EError,EStart,EStart,EError,EError,EError,EError,#18-1f
284
284
  EError,EError, 5,EError,EError,EError,EItsMe,EError,#20-27
285
285
  EError,EError,EStart,EStart,EStart,EStart,EStart,EStart#28-2f
286
- ]
286
+ ].freeze
287
287
 
288
288
  # To be accurate, the length of class 6 can be either 2 or 4.
289
289
  # But it is not necessary to discriminate between the two since
290
290
  # it is used for frequency analysis only, and we are validing
291
291
  # each code range there as well. So it is safe to set it to be
292
292
  # 2 here.
293
- GB2312CharLenTable = [0, 1, 1, 1, 1, 1, 2]
293
+ GB2312CharLenTable = [0, 1, 1, 1, 1, 1, 2].freeze
294
294
 
295
295
  GB2312SMModel = {'classTable' => GB2312_cls,
296
- 'classFactor' => 7,
297
- 'stateTable' => GB2312_st,
298
- 'charLenTable' => GB2312CharLenTable,
299
- 'name' => 'GB2312'
300
- }
296
+ 'classFactor' => 7,
297
+ 'stateTable' => GB2312_st,
298
+ 'charLenTable' => GB2312CharLenTable,
299
+ 'name' => 'GB2312'
300
+ }.freeze
301
301
 
302
302
  # Shift_JIS
303
303
 
@@ -336,22 +336,22 @@ module CharDet
336
336
  3,3,3,3,3,4,4,4, # e8 - ef
337
337
  4,4,4,4,4,4,4,4, # f0 - f7
338
338
  4,4,4,4,4,0,0,0 # f8 - ff
339
- ]
339
+ ].freeze
340
340
 
341
341
  SJIS_st = [
342
342
  EError,EStart,EStart, 3,EError,EError,EError,EError,#00-07
343
343
  EError,EError,EError,EError,EItsMe,EItsMe,EItsMe,EItsMe,#08-0f
344
344
  EItsMe,EItsMe,EError,EError,EStart,EStart,EStart,EStart#10-17
345
- ]
345
+ ].freeze
346
346
 
347
- SJISCharLenTable = [0, 1, 1, 2, 0, 0]
347
+ SJISCharLenTable = [0, 1, 1, 2, 0, 0].freeze
348
348
 
349
349
  SJISSMModel = {'classTable' => SJIS_cls,
350
- 'classFactor' => 6,
351
- 'stateTable' => SJIS_st,
352
- 'charLenTable' => SJISCharLenTable,
353
- 'name' => 'Shift_JIS'
354
- }
350
+ 'classFactor' => 6,
351
+ 'stateTable' => SJIS_st,
352
+ 'charLenTable' => SJISCharLenTable,
353
+ 'name' => 'Shift_JIS'
354
+ }.freeze
355
355
 
356
356
  # UCS2-BE
357
357
 
@@ -388,7 +388,7 @@ module CharDet
388
388
  0,0,0,0,0,0,0,0, # e8 - ef
389
389
  0,0,0,0,0,0,0,0, # f0 - f7
390
390
  0,0,0,0,0,0,4,5 # f8 - ff
391
- ]
391
+ ].freeze
392
392
 
393
393
  UCS2BE_st = [
394
394
  5, 7, 7,EError, 4, 3,EError,EError,#00-07
@@ -398,16 +398,16 @@ module CharDet
398
398
  6, 6, 6, 6, 5, 7, 7,EError,#20-27
399
399
  5, 8, 6, 6,EError, 6, 6, 6,#28-2f
400
400
  6, 6, 6, 6,EError,EError,EStart,EStart#30-37
401
- ]
401
+ ].freeze
402
402
 
403
- UCS2BECharLenTable = [2, 2, 2, 0, 2, 2]
403
+ UCS2BECharLenTable = [2, 2, 2, 0, 2, 2].freeze
404
404
 
405
405
  UCS2BESMModel = {'classTable' => UCS2BE_cls,
406
- 'classFactor' => 6,
407
- 'stateTable' => UCS2BE_st,
408
- 'charLenTable' => UCS2BECharLenTable,
409
- 'name' => 'UTF-16BE'
410
- }
406
+ 'classFactor' => 6,
407
+ 'stateTable' => UCS2BE_st,
408
+ 'charLenTable' => UCS2BECharLenTable,
409
+ 'name' => 'UTF-16BE'
410
+ }.freeze
411
411
 
412
412
  # UCS2-LE
413
413
 
@@ -444,7 +444,7 @@ module CharDet
444
444
  0,0,0,0,0,0,0,0, # e8 - ef
445
445
  0,0,0,0,0,0,0,0, # f0 - f7
446
446
  0,0,0,0,0,0,4,5 # f8 - ff
447
- ]
447
+ ].freeze
448
448
 
449
449
  UCS2LE_st = [
450
450
  6, 6, 7, 6, 4, 3,EError,EError,#00-07
@@ -454,16 +454,16 @@ module CharDet
454
454
  7, 6, 8, 8, 5, 5, 5,EError,#20-27
455
455
  5, 5, 5,EError,EError,EError, 5, 5,#28-2f
456
456
  5, 5, 5,EError, 5,EError,EStart,EStart#30-37
457
- ]
457
+ ].freeze
458
458
 
459
- UCS2LECharLenTable = [2, 2, 2, 2, 2, 2]
459
+ UCS2LECharLenTable = [2, 2, 2, 2, 2, 2].freeze
460
460
 
461
461
  UCS2LESMModel = {'classTable' => UCS2LE_cls,
462
- 'classFactor' => 6,
463
- 'stateTable' => UCS2LE_st,
464
- 'charLenTable' => UCS2LECharLenTable,
465
- 'name' => 'UTF-16LE'
466
- }
462
+ 'classFactor' => 6,
463
+ 'stateTable' => UCS2LE_st,
464
+ 'charLenTable' => UCS2LECharLenTable,
465
+ 'name' => 'UTF-16LE'
466
+ }.freeze
467
467
 
468
468
  # UTF-8
469
469
 
@@ -500,7 +500,7 @@ module CharDet
500
500
  8,8,8,8,8,9,8,8, # e8 - ef
501
501
  10,11,11,11,11,11,11,11, # f0 - f7
502
502
  12,13,13,13,14,15,0,0 # f8 - ff
503
- ]
503
+ ].freeze
504
504
 
505
505
  UTF8_st = [
506
506
  EError,EStart,EError,EError,EError,EError, 12, 10,#00-07
@@ -529,14 +529,14 @@ module CharDet
529
529
  EError,EError,EError,EError,EError,EError,EError,EError,#b8-bf
530
530
  EError,EError,EStart,EStart,EStart,EStart,EError,EError,#c0-c7
531
531
  EError,EError,EError,EError,EError,EError,EError,EError#c8-cf
532
- ]
532
+ ].freeze
533
533
 
534
- UTF8CharLenTable = [0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6]
534
+ UTF8CharLenTable = [0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6].freeze
535
535
 
536
536
  UTF8SMModel = {'classTable' => UTF8_cls,
537
- 'classFactor' => 16,
538
- 'stateTable' => UTF8_st,
539
- 'charLenTable' => UTF8CharLenTable,
540
- 'name' => 'UTF-8'
541
- }
537
+ 'classFactor' => 16,
538
+ 'stateTable' => UTF8_st,
539
+ 'charLenTable' => UTF8CharLenTable,
540
+ 'name' => 'UTF-8'
541
+ }.freeze
542
542
  end