tmail 1.2.7 → 1.2.7.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -31,21 +31,19 @@ module CharDet
31
31
  class SBCSGroupProber < CharSetGroupProber
32
32
  def initialize
33
33
  super
34
- @_mProbers = [
35
- SingleByteCharSetProber.new(Win1251CyrillicModel),
36
- SingleByteCharSetProber.new(Koi8rModel),
37
- SingleByteCharSetProber.new(Latin5CyrillicModel),
38
- SingleByteCharSetProber.new(MacCyrillicModel),
39
- SingleByteCharSetProber.new(Ibm866Model),
40
- SingleByteCharSetProber.new(Ibm855Model),
41
- SingleByteCharSetProber.new(Latin7GreekModel),
42
- SingleByteCharSetProber.new(Win1253GreekModel),
43
- SingleByteCharSetProber.new(Latin5BulgarianModel),
44
- SingleByteCharSetProber.new(Win1251BulgarianModel),
45
- SingleByteCharSetProber.new(Latin2HungarianModel),
46
- SingleByteCharSetProber.new(Win1250HungarianModel),
47
- SingleByteCharSetProber.new(TIS620ThaiModel),
48
- ]
34
+ @_mProbers = [ SingleByteCharSetProber.new(Win1251CyrillicModel),
35
+ SingleByteCharSetProber.new(Koi8rModel),
36
+ SingleByteCharSetProber.new(Latin5CyrillicModel),
37
+ SingleByteCharSetProber.new(MacCyrillicModel),
38
+ SingleByteCharSetProber.new(Ibm866Model),
39
+ SingleByteCharSetProber.new(Ibm855Model),
40
+ SingleByteCharSetProber.new(Latin7GreekModel),
41
+ SingleByteCharSetProber.new(Win1253GreekModel),
42
+ SingleByteCharSetProber.new(Latin5BulgarianModel),
43
+ SingleByteCharSetProber.new(Win1251BulgarianModel),
44
+ SingleByteCharSetProber.new(Latin2HungarianModel),
45
+ SingleByteCharSetProber.new(Win1250HungarianModel),
46
+ SingleByteCharSetProber.new(TIS620ThaiModel) ]
49
47
  hebrewProber = HebrewProber.new()
50
48
  logicalHebrewProber = SingleByteCharSetProber.new(Win1255HebrewModel, false, hebrewProber)
51
49
  visualHebrewProber = SingleByteCharSetProber.new(Win1255HebrewModel, true, hebrewProber)
@@ -48,33 +48,33 @@ module CharDet
48
48
  def feed(aBuf)
49
49
  aLen = aBuf.length
50
50
  for i in (0...aLen)
51
- codingState = @_mCodingSM.next_state(aBuf[i..i])
52
- if codingState == EError
53
- $stderr << "#{get_charset_name} prober hit error at byte #{i}\n" if $debug
54
- @_mState = ENotMe
55
- break
56
- elsif codingState == EItsMe
57
- @_mState = EFoundIt
58
- break
59
- elsif codingState == EStart
60
- charLen = @_mCodingSM.get_current_charlen()
61
- if i == 0
62
- @_mLastChar[1] = aBuf[0..0]
63
- @_mContextAnalyzer.feed(@_mLastChar[2 - charLen..-1], charLen)
64
- @_mDistributionAnalyzer.feed(@_mLastChar, charLen)
65
- else
66
- @_mContextAnalyzer.feed(aBuf[i + 1 - charLen ... i + 3 - charLen], charLen)
67
- @_mDistributionAnalyzer.feed(aBuf[i - 1 ... i + 1], charLen)
68
- end
69
- end
51
+ codingState = @_mCodingSM.next_state(aBuf[i..i])
52
+ if codingState == EError
53
+ $stderr << "#{get_charset_name} prober hit error at byte #{i}\n" if $debug
54
+ @_mState = ENotMe
55
+ break
56
+ elsif codingState == EItsMe
57
+ @_mState = EFoundIt
58
+ break
59
+ elsif codingState == EStart
60
+ charLen = @_mCodingSM.get_current_charlen()
61
+ if i == 0
62
+ @_mLastChar[1] = aBuf[0..0]
63
+ @_mContextAnalyzer.feed(@_mLastChar[2 - charLen..-1], charLen)
64
+ @_mDistributionAnalyzer.feed(@_mLastChar, charLen)
65
+ else
66
+ @_mContextAnalyzer.feed(aBuf[i + 1 - charLen ... i + 3 - charLen], charLen)
67
+ @_mDistributionAnalyzer.feed(aBuf[i - 1 ... i + 1], charLen)
68
+ end
69
+ end
70
70
  end
71
71
 
72
72
  @_mLastChar[0] = aBuf[aLen - 1.. aLen-1]
73
73
 
74
- if get_state() == EDetecting:
75
- if @_mContextAnalyzer.got_enough_data() and (get_confidence() > SHORTCUT_THRESHOLD)
76
- @_mState = EFoundIt
77
- end
74
+ if get_state() == EDetecting
75
+ if @_mContextAnalyzer.got_enough_data() and (get_confidence() > SHORTCUT_THRESHOLD)
76
+ @_mState = EFoundIt
77
+ end
78
78
  end
79
79
 
80
80
  return get_state()
@@ -51,10 +51,10 @@ module CharDet
51
51
  @_mInputState = EPureAscii
52
52
  @_mLastChar = ''
53
53
  if @_mEscCharSetProber
54
- @_mEscCharSetProber.reset()
54
+ @_mEscCharSetProber.reset()
55
55
  end
56
56
  for prober in @_mCharSetProbers
57
- prober.reset()
57
+ prober.reset()
58
58
  end
59
59
  end
60
60
 
@@ -65,101 +65,102 @@ module CharDet
65
65
  return if not aLen
66
66
 
67
67
  if not @_mGotData
68
- # If the data starts with BOM, we know it is UTF
69
- if aBuf[0...3] == "\xEF\xBB\xBF"
70
- # EF BB BF UTF-8 with BOM
71
- @result = {'encoding' => "UTF-8", 'confidence' => 1.0}
72
- elsif aBuf[0...4] == "\xFF\xFE\x00\x00"
73
- # FF FE 00 00 UTF-32, little-endian BOM
74
- @result = {'encoding' => "UTF-32LE", 'confidence' => 1.0}
75
- elsif aBuf[0...4] == "\x00\x00\xFE\xFF"
76
- # 00 00 FE FF UTF-32, big-endian BOM
77
- @result = {'encoding' => "UTF-32BE", 'confidence' => 1.0}
78
- elsif aBuf[0...4] == "\xFE\xFF\x00\x00"
79
- # FE FF 00 00 UCS-4, unusual octet order BOM (3412)
80
- @result = {'encoding' => "X-ISO-10646-UCS-4-3412", 'confidence' => 1.0}
81
- elsif aBuf[0...4] == "\x00\x00\xFF\xFE"
82
- # 00 00 FF FE UCS-4, unusual octet order BOM (2143)
83
- @result = {'encoding' => "X-ISO-10646-UCS-4-2143", 'confidence' => 1.0}
84
- elsif aBuf[0...2] == "\xFF\xFE"
85
- # FF FE UTF-16, little endian BOM
86
- @result = {'encoding' => "UTF-16LE", 'confidence' => 1.0}
87
- elsif aBuf[0...2] == "\xFE\xFF"
88
- # FE FF UTF-16, big endian BOM
89
- @result = {'encoding' => "UTF-16BE", 'confidence' => 1.0}
90
- end
68
+ # If the data starts with BOM, we know it is UTF
69
+ if aBuf[0...3] == "\xEF\xBB\xBF"
70
+ # EF BB BF UTF-8 with BOM
71
+ @result = {'encoding' => "UTF-8", 'confidence' => 1.0}
72
+ elsif aBuf[0...4] == "\xFF\xFE\x00\x00"
73
+ # FF FE 00 00 UTF-32, little-endian BOM
74
+ @result = {'encoding' => "UTF-32LE", 'confidence' => 1.0}
75
+ elsif aBuf[0...4] == "\x00\x00\xFE\xFF"
76
+ # 00 00 FE FF UTF-32, big-endian BOM
77
+ @result = {'encoding' => "UTF-32BE", 'confidence' => 1.0}
78
+ elsif aBuf[0...4] == "\xFE\xFF\x00\x00"
79
+ # FE FF 00 00 UCS-4, unusual octet order BOM (3412)
80
+ @result = {'encoding' => "X-ISO-10646-UCS-4-3412", 'confidence' => 1.0}
81
+ elsif aBuf[0...4] == "\x00\x00\xFF\xFE"
82
+ # 00 00 FF FE UCS-4, unusual octet order BOM (2143)
83
+ @result = {'encoding' => "X-ISO-10646-UCS-4-2143", 'confidence' => 1.0}
84
+ elsif aBuf[0...2] == "\xFF\xFE"
85
+ # FF FE UTF-16, little endian BOM
86
+ @result = {'encoding' => "UTF-16LE", 'confidence' => 1.0}
87
+ elsif aBuf[0...2] == "\xFE\xFF"
88
+ # FE FF UTF-16, big endian BOM
89
+ @result = {'encoding' => "UTF-16BE", 'confidence' => 1.0}
90
+ end
91
91
  end
92
-
92
+
93
93
  @_mGotData = true
94
- if @result['encoding'] and (@result['confidence'] > 0.0):
95
- @done = true
96
- return
94
+ if @result['encoding'] and (@result['confidence'] > 0.0)
95
+ @done = true
96
+ return
97
97
  end
98
- if @_mInputState == EPureAscii:
99
- if @_highBitDetector =~ (aBuf)
100
- @_mInputState = EHighbyte
101
- elsif (@_mInputState == EPureAscii) and @_escDetector =~ (@_mLastChar + aBuf)
102
- @_mInputState = EEscAscii
103
- end
98
+
99
+ if @_mInputState == EPureAscii
100
+ if @_highBitDetector =~ (aBuf)
101
+ @_mInputState = EHighbyte
102
+ elsif (@_mInputState == EPureAscii) and @_escDetector =~ (@_mLastChar + aBuf)
103
+ @_mInputState = EEscAscii
104
+ end
104
105
  end
105
-
106
+
106
107
  @_mLastChar = aBuf[-1..-1]
107
108
  if @_mInputState == EEscAscii
108
- if not @_mEscCharSetProber
109
- @_mEscCharSetProber = EscCharSetProber.new()
110
- end
111
- if @_mEscCharSetProber.feed(aBuf) == EFoundIt
112
- @result = {'encoding' => self._mEscCharSetProber.get_charset_name(),
113
- 'confidence' => @_mEscCharSetProber.get_confidence()
114
- }
115
- @done = true
116
- end
109
+ if not @_mEscCharSetProber
110
+ @_mEscCharSetProber = EscCharSetProber.new()
111
+ end
112
+ if @_mEscCharSetProber.feed(aBuf) == EFoundIt
113
+ @result = {'encoding' => self._mEscCharSetProber.get_charset_name(),
114
+ 'confidence' => @_mEscCharSetProber.get_confidence()
115
+ }
116
+ @done = true
117
+ end
117
118
  elsif @_mInputState == EHighbyte
118
- if not @_mCharSetProbers or @_mCharSetProbers.empty?
119
- @_mCharSetProbers = [MBCSGroupProber.new(), SBCSGroupProber.new(), Latin1Prober.new()]
120
- end
121
- for prober in @_mCharSetProbers
122
- if prober.feed(aBuf) == EFoundIt
123
- @result = {'encoding' => prober.get_charset_name(),
124
- 'confidence' => prober.get_confidence()}
125
- @done = true
126
- break
127
- end
128
- end
119
+ if not @_mCharSetProbers or @_mCharSetProbers.empty?
120
+ @_mCharSetProbers = [MBCSGroupProber.new(), SBCSGroupProber.new(), Latin1Prober.new()]
121
+ end
122
+ for prober in @_mCharSetProbers
123
+ if prober.feed(aBuf) == EFoundIt
124
+ @result = {'encoding' => prober.get_charset_name(),
125
+ 'confidence' => prober.get_confidence()}
126
+ @done = true
127
+ break
128
+ end
129
+ end
129
130
  end
130
-
131
+
131
132
  end
132
-
133
+
133
134
  def close
134
135
  return if @done
135
136
  if not @_mGotData
136
- $stderr << "no data received!\n" if $debug
137
- return
137
+ $stderr << "no data received!\n" if $debug
138
+ return
138
139
  end
139
140
  @done = true
140
-
141
- if @_mInputState == EPureAscii:
142
- @result = {'encoding' => 'ascii', 'confidence' => 1.0}
143
- return @result
141
+
142
+ if @_mInputState == EPureAscii
143
+ @result = {'encoding' => 'ascii', 'confidence' => 1.0}
144
+ return @result
144
145
  end
145
-
146
- if @_mInputState == EHighbyte:
147
- confidences = {}
146
+
147
+ if @_mInputState == EHighbyte
148
+ confidences = {}
148
149
  @_mCharSetProbers.each{ |prober| confidences[prober] = prober.get_confidence }
149
- maxProber = @_mCharSetProbers.max{ |a,b| confidences[a] <=> confidences[b] }
150
- if maxProber and maxProber.get_confidence > MINIMUM_THRESHOLD
151
- @result = {'encoding' => maxProber.get_charset_name(),
152
- 'confidence' => maxProber.get_confidence()}
153
- return @result
154
- end
150
+ maxProber = @_mCharSetProbers.max{ |a,b| confidences[a] <=> confidences[b] }
151
+ if maxProber and maxProber.get_confidence > MINIMUM_THRESHOLD
152
+ @result = {'encoding' => maxProber.get_charset_name(),
153
+ 'confidence' => maxProber.get_confidence()}
154
+ return @result
155
+ end
155
156
  end
156
157
 
157
158
  if $debug
158
- $stderr << "no probers hit minimum threshhold\n" if $debug
159
- for prober in @_mCharSetProbers[0]._mProbers
160
- next if not prober
161
- $stderr << "#{prober.get_charset_name} confidence = #{prober.get_confidence}\n" if $debug
162
- end
159
+ $stderr << "no probers hit minimum threshhold\n" if $debug
160
+ for prober in @_mCharSetProbers[0]._mProbers
161
+ next if not prober
162
+ $stderr << "#{prober.get_charset_name} confidence = #{prober.get_confidence}\n" if $debug
163
+ end
163
164
  end
164
165
  end
165
166
  end
@@ -48,25 +48,25 @@ module CharDet
48
48
 
49
49
  def feed(aBuf)
50
50
  aBuf.each_byte do |b|
51
- c = b.chr
52
- codingState = @_mCodingSM.next_state(c)
53
- if codingState == EError
54
- @_mState = ENotMe
55
- break
56
- elsif codingState == EItsMe
57
- @_mState = EFoundIt
58
- break
59
- elsif codingState == EStart
60
- if @_mCodingSM.get_current_charlen() >= 2
61
- @_mNumOfMBChar += 1
62
- end
63
- end
51
+ c = b.chr
52
+ codingState = @_mCodingSM.next_state(c)
53
+ if codingState == EError
54
+ @_mState = ENotMe
55
+ break
56
+ elsif codingState == EItsMe
57
+ @_mState = EFoundIt
58
+ break
59
+ elsif codingState == EStart
60
+ if @_mCodingSM.get_current_charlen() >= 2
61
+ @_mNumOfMBChar += 1
62
+ end
63
+ end
64
64
  end
65
65
 
66
- if get_state() == EDetecting:
67
- if get_confidence() > SHORTCUT_THRESHOLD
68
- @_mState = EFoundIt
69
- end
66
+ if get_state() == EDetecting
67
+ if get_confidence() > SHORTCUT_THRESHOLD
68
+ @_mState = EFoundIt
69
+ end
70
70
  end
71
71
 
72
72
  return get_state()
@@ -75,12 +75,12 @@ module CharDet
75
75
  def get_confidence
76
76
  unlike = 0.99
77
77
  if @_mNumOfMBChar < 6
78
- for i in (0...@_mNumOfMBChar)
79
- unlike = unlike * ONE_CHAR_PROB
80
- end
81
- return 1.0 - unlike
78
+ for i in (0...@_mNumOfMBChar)
79
+ unlike = unlike * ONE_CHAR_PROB
80
+ end
81
+ return 1.0 - unlike
82
82
  else
83
- return unlike
83
+ return unlike
84
84
  end
85
85
  end
86
86
  end
@@ -33,7 +33,8 @@ module TMail
33
33
  MAJOR = 1
34
34
  MINOR = 2
35
35
  TINY = 7
36
+ MICRO = 1
36
37
 
37
- STRING = [MAJOR, MINOR, TINY].join('.')
38
+ STRING = [MAJOR, MINOR, TINY, MICRO].join('.')
38
39
  end
39
40
  end
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{tmail}
5
- s.version = "1.2.7"
5
+ s.version = "1.2.7.1"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Mikel Lindsaar <raasdnil AT gmail.com>"]
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tmail
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.7
4
+ version: 1.2.7.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mikel Lindsaar <raasdnil AT gmail.com>