tmail 1.2.7 → 1.2.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -31,21 +31,19 @@ module CharDet
31
31
  class SBCSGroupProber < CharSetGroupProber
32
32
  def initialize
33
33
  super
34
- @_mProbers = [
35
- SingleByteCharSetProber.new(Win1251CyrillicModel),
36
- SingleByteCharSetProber.new(Koi8rModel),
37
- SingleByteCharSetProber.new(Latin5CyrillicModel),
38
- SingleByteCharSetProber.new(MacCyrillicModel),
39
- SingleByteCharSetProber.new(Ibm866Model),
40
- SingleByteCharSetProber.new(Ibm855Model),
41
- SingleByteCharSetProber.new(Latin7GreekModel),
42
- SingleByteCharSetProber.new(Win1253GreekModel),
43
- SingleByteCharSetProber.new(Latin5BulgarianModel),
44
- SingleByteCharSetProber.new(Win1251BulgarianModel),
45
- SingleByteCharSetProber.new(Latin2HungarianModel),
46
- SingleByteCharSetProber.new(Win1250HungarianModel),
47
- SingleByteCharSetProber.new(TIS620ThaiModel),
48
- ]
34
+ @_mProbers = [ SingleByteCharSetProber.new(Win1251CyrillicModel),
35
+ SingleByteCharSetProber.new(Koi8rModel),
36
+ SingleByteCharSetProber.new(Latin5CyrillicModel),
37
+ SingleByteCharSetProber.new(MacCyrillicModel),
38
+ SingleByteCharSetProber.new(Ibm866Model),
39
+ SingleByteCharSetProber.new(Ibm855Model),
40
+ SingleByteCharSetProber.new(Latin7GreekModel),
41
+ SingleByteCharSetProber.new(Win1253GreekModel),
42
+ SingleByteCharSetProber.new(Latin5BulgarianModel),
43
+ SingleByteCharSetProber.new(Win1251BulgarianModel),
44
+ SingleByteCharSetProber.new(Latin2HungarianModel),
45
+ SingleByteCharSetProber.new(Win1250HungarianModel),
46
+ SingleByteCharSetProber.new(TIS620ThaiModel) ]
49
47
  hebrewProber = HebrewProber.new()
50
48
  logicalHebrewProber = SingleByteCharSetProber.new(Win1255HebrewModel, false, hebrewProber)
51
49
  visualHebrewProber = SingleByteCharSetProber.new(Win1255HebrewModel, true, hebrewProber)
@@ -48,33 +48,33 @@ module CharDet
48
48
  def feed(aBuf)
49
49
  aLen = aBuf.length
50
50
  for i in (0...aLen)
51
- codingState = @_mCodingSM.next_state(aBuf[i..i])
52
- if codingState == EError
53
- $stderr << "#{get_charset_name} prober hit error at byte #{i}\n" if $debug
54
- @_mState = ENotMe
55
- break
56
- elsif codingState == EItsMe
57
- @_mState = EFoundIt
58
- break
59
- elsif codingState == EStart
60
- charLen = @_mCodingSM.get_current_charlen()
61
- if i == 0
62
- @_mLastChar[1] = aBuf[0..0]
63
- @_mContextAnalyzer.feed(@_mLastChar[2 - charLen..-1], charLen)
64
- @_mDistributionAnalyzer.feed(@_mLastChar, charLen)
65
- else
66
- @_mContextAnalyzer.feed(aBuf[i + 1 - charLen ... i + 3 - charLen], charLen)
67
- @_mDistributionAnalyzer.feed(aBuf[i - 1 ... i + 1], charLen)
68
- end
69
- end
51
+ codingState = @_mCodingSM.next_state(aBuf[i..i])
52
+ if codingState == EError
53
+ $stderr << "#{get_charset_name} prober hit error at byte #{i}\n" if $debug
54
+ @_mState = ENotMe
55
+ break
56
+ elsif codingState == EItsMe
57
+ @_mState = EFoundIt
58
+ break
59
+ elsif codingState == EStart
60
+ charLen = @_mCodingSM.get_current_charlen()
61
+ if i == 0
62
+ @_mLastChar[1] = aBuf[0..0]
63
+ @_mContextAnalyzer.feed(@_mLastChar[2 - charLen..-1], charLen)
64
+ @_mDistributionAnalyzer.feed(@_mLastChar, charLen)
65
+ else
66
+ @_mContextAnalyzer.feed(aBuf[i + 1 - charLen ... i + 3 - charLen], charLen)
67
+ @_mDistributionAnalyzer.feed(aBuf[i - 1 ... i + 1], charLen)
68
+ end
69
+ end
70
70
  end
71
71
 
72
72
  @_mLastChar[0] = aBuf[aLen - 1.. aLen-1]
73
73
 
74
- if get_state() == EDetecting:
75
- if @_mContextAnalyzer.got_enough_data() and (get_confidence() > SHORTCUT_THRESHOLD)
76
- @_mState = EFoundIt
77
- end
74
+ if get_state() == EDetecting
75
+ if @_mContextAnalyzer.got_enough_data() and (get_confidence() > SHORTCUT_THRESHOLD)
76
+ @_mState = EFoundIt
77
+ end
78
78
  end
79
79
 
80
80
  return get_state()
@@ -51,10 +51,10 @@ module CharDet
51
51
  @_mInputState = EPureAscii
52
52
  @_mLastChar = ''
53
53
  if @_mEscCharSetProber
54
- @_mEscCharSetProber.reset()
54
+ @_mEscCharSetProber.reset()
55
55
  end
56
56
  for prober in @_mCharSetProbers
57
- prober.reset()
57
+ prober.reset()
58
58
  end
59
59
  end
60
60
 
@@ -65,101 +65,102 @@ module CharDet
65
65
  return if not aLen
66
66
 
67
67
  if not @_mGotData
68
- # If the data starts with BOM, we know it is UTF
69
- if aBuf[0...3] == "\xEF\xBB\xBF"
70
- # EF BB BF UTF-8 with BOM
71
- @result = {'encoding' => "UTF-8", 'confidence' => 1.0}
72
- elsif aBuf[0...4] == "\xFF\xFE\x00\x00"
73
- # FF FE 00 00 UTF-32, little-endian BOM
74
- @result = {'encoding' => "UTF-32LE", 'confidence' => 1.0}
75
- elsif aBuf[0...4] == "\x00\x00\xFE\xFF"
76
- # 00 00 FE FF UTF-32, big-endian BOM
77
- @result = {'encoding' => "UTF-32BE", 'confidence' => 1.0}
78
- elsif aBuf[0...4] == "\xFE\xFF\x00\x00"
79
- # FE FF 00 00 UCS-4, unusual octet order BOM (3412)
80
- @result = {'encoding' => "X-ISO-10646-UCS-4-3412", 'confidence' => 1.0}
81
- elsif aBuf[0...4] == "\x00\x00\xFF\xFE"
82
- # 00 00 FF FE UCS-4, unusual octet order BOM (2143)
83
- @result = {'encoding' => "X-ISO-10646-UCS-4-2143", 'confidence' => 1.0}
84
- elsif aBuf[0...2] == "\xFF\xFE"
85
- # FF FE UTF-16, little endian BOM
86
- @result = {'encoding' => "UTF-16LE", 'confidence' => 1.0}
87
- elsif aBuf[0...2] == "\xFE\xFF"
88
- # FE FF UTF-16, big endian BOM
89
- @result = {'encoding' => "UTF-16BE", 'confidence' => 1.0}
90
- end
68
+ # If the data starts with BOM, we know it is UTF
69
+ if aBuf[0...3] == "\xEF\xBB\xBF"
70
+ # EF BB BF UTF-8 with BOM
71
+ @result = {'encoding' => "UTF-8", 'confidence' => 1.0}
72
+ elsif aBuf[0...4] == "\xFF\xFE\x00\x00"
73
+ # FF FE 00 00 UTF-32, little-endian BOM
74
+ @result = {'encoding' => "UTF-32LE", 'confidence' => 1.0}
75
+ elsif aBuf[0...4] == "\x00\x00\xFE\xFF"
76
+ # 00 00 FE FF UTF-32, big-endian BOM
77
+ @result = {'encoding' => "UTF-32BE", 'confidence' => 1.0}
78
+ elsif aBuf[0...4] == "\xFE\xFF\x00\x00"
79
+ # FE FF 00 00 UCS-4, unusual octet order BOM (3412)
80
+ @result = {'encoding' => "X-ISO-10646-UCS-4-3412", 'confidence' => 1.0}
81
+ elsif aBuf[0...4] == "\x00\x00\xFF\xFE"
82
+ # 00 00 FF FE UCS-4, unusual octet order BOM (2143)
83
+ @result = {'encoding' => "X-ISO-10646-UCS-4-2143", 'confidence' => 1.0}
84
+ elsif aBuf[0...2] == "\xFF\xFE"
85
+ # FF FE UTF-16, little endian BOM
86
+ @result = {'encoding' => "UTF-16LE", 'confidence' => 1.0}
87
+ elsif aBuf[0...2] == "\xFE\xFF"
88
+ # FE FF UTF-16, big endian BOM
89
+ @result = {'encoding' => "UTF-16BE", 'confidence' => 1.0}
90
+ end
91
91
  end
92
-
92
+
93
93
  @_mGotData = true
94
- if @result['encoding'] and (@result['confidence'] > 0.0):
95
- @done = true
96
- return
94
+ if @result['encoding'] and (@result['confidence'] > 0.0)
95
+ @done = true
96
+ return
97
97
  end
98
- if @_mInputState == EPureAscii:
99
- if @_highBitDetector =~ (aBuf)
100
- @_mInputState = EHighbyte
101
- elsif (@_mInputState == EPureAscii) and @_escDetector =~ (@_mLastChar + aBuf)
102
- @_mInputState = EEscAscii
103
- end
98
+
99
+ if @_mInputState == EPureAscii
100
+ if @_highBitDetector =~ (aBuf)
101
+ @_mInputState = EHighbyte
102
+ elsif (@_mInputState == EPureAscii) and @_escDetector =~ (@_mLastChar + aBuf)
103
+ @_mInputState = EEscAscii
104
+ end
104
105
  end
105
-
106
+
106
107
  @_mLastChar = aBuf[-1..-1]
107
108
  if @_mInputState == EEscAscii
108
- if not @_mEscCharSetProber
109
- @_mEscCharSetProber = EscCharSetProber.new()
110
- end
111
- if @_mEscCharSetProber.feed(aBuf) == EFoundIt
112
- @result = {'encoding' => self._mEscCharSetProber.get_charset_name(),
113
- 'confidence' => @_mEscCharSetProber.get_confidence()
114
- }
115
- @done = true
116
- end
109
+ if not @_mEscCharSetProber
110
+ @_mEscCharSetProber = EscCharSetProber.new()
111
+ end
112
+ if @_mEscCharSetProber.feed(aBuf) == EFoundIt
113
+ @result = {'encoding' => self._mEscCharSetProber.get_charset_name(),
114
+ 'confidence' => @_mEscCharSetProber.get_confidence()
115
+ }
116
+ @done = true
117
+ end
117
118
  elsif @_mInputState == EHighbyte
118
- if not @_mCharSetProbers or @_mCharSetProbers.empty?
119
- @_mCharSetProbers = [MBCSGroupProber.new(), SBCSGroupProber.new(), Latin1Prober.new()]
120
- end
121
- for prober in @_mCharSetProbers
122
- if prober.feed(aBuf) == EFoundIt
123
- @result = {'encoding' => prober.get_charset_name(),
124
- 'confidence' => prober.get_confidence()}
125
- @done = true
126
- break
127
- end
128
- end
119
+ if not @_mCharSetProbers or @_mCharSetProbers.empty?
120
+ @_mCharSetProbers = [MBCSGroupProber.new(), SBCSGroupProber.new(), Latin1Prober.new()]
121
+ end
122
+ for prober in @_mCharSetProbers
123
+ if prober.feed(aBuf) == EFoundIt
124
+ @result = {'encoding' => prober.get_charset_name(),
125
+ 'confidence' => prober.get_confidence()}
126
+ @done = true
127
+ break
128
+ end
129
+ end
129
130
  end
130
-
131
+
131
132
  end
132
-
133
+
133
134
  def close
134
135
  return if @done
135
136
  if not @_mGotData
136
- $stderr << "no data received!\n" if $debug
137
- return
137
+ $stderr << "no data received!\n" if $debug
138
+ return
138
139
  end
139
140
  @done = true
140
-
141
- if @_mInputState == EPureAscii:
142
- @result = {'encoding' => 'ascii', 'confidence' => 1.0}
143
- return @result
141
+
142
+ if @_mInputState == EPureAscii
143
+ @result = {'encoding' => 'ascii', 'confidence' => 1.0}
144
+ return @result
144
145
  end
145
-
146
- if @_mInputState == EHighbyte:
147
- confidences = {}
146
+
147
+ if @_mInputState == EHighbyte
148
+ confidences = {}
148
149
  @_mCharSetProbers.each{ |prober| confidences[prober] = prober.get_confidence }
149
- maxProber = @_mCharSetProbers.max{ |a,b| confidences[a] <=> confidences[b] }
150
- if maxProber and maxProber.get_confidence > MINIMUM_THRESHOLD
151
- @result = {'encoding' => maxProber.get_charset_name(),
152
- 'confidence' => maxProber.get_confidence()}
153
- return @result
154
- end
150
+ maxProber = @_mCharSetProbers.max{ |a,b| confidences[a] <=> confidences[b] }
151
+ if maxProber and maxProber.get_confidence > MINIMUM_THRESHOLD
152
+ @result = {'encoding' => maxProber.get_charset_name(),
153
+ 'confidence' => maxProber.get_confidence()}
154
+ return @result
155
+ end
155
156
  end
156
157
 
157
158
  if $debug
158
- $stderr << "no probers hit minimum threshhold\n" if $debug
159
- for prober in @_mCharSetProbers[0]._mProbers
160
- next if not prober
161
- $stderr << "#{prober.get_charset_name} confidence = #{prober.get_confidence}\n" if $debug
162
- end
159
+ $stderr << "no probers hit minimum threshhold\n" if $debug
160
+ for prober in @_mCharSetProbers[0]._mProbers
161
+ next if not prober
162
+ $stderr << "#{prober.get_charset_name} confidence = #{prober.get_confidence}\n" if $debug
163
+ end
163
164
  end
164
165
  end
165
166
  end
@@ -48,25 +48,25 @@ module CharDet
48
48
 
49
49
  def feed(aBuf)
50
50
  aBuf.each_byte do |b|
51
- c = b.chr
52
- codingState = @_mCodingSM.next_state(c)
53
- if codingState == EError
54
- @_mState = ENotMe
55
- break
56
- elsif codingState == EItsMe
57
- @_mState = EFoundIt
58
- break
59
- elsif codingState == EStart
60
- if @_mCodingSM.get_current_charlen() >= 2
61
- @_mNumOfMBChar += 1
62
- end
63
- end
51
+ c = b.chr
52
+ codingState = @_mCodingSM.next_state(c)
53
+ if codingState == EError
54
+ @_mState = ENotMe
55
+ break
56
+ elsif codingState == EItsMe
57
+ @_mState = EFoundIt
58
+ break
59
+ elsif codingState == EStart
60
+ if @_mCodingSM.get_current_charlen() >= 2
61
+ @_mNumOfMBChar += 1
62
+ end
63
+ end
64
64
  end
65
65
 
66
- if get_state() == EDetecting:
67
- if get_confidence() > SHORTCUT_THRESHOLD
68
- @_mState = EFoundIt
69
- end
66
+ if get_state() == EDetecting
67
+ if get_confidence() > SHORTCUT_THRESHOLD
68
+ @_mState = EFoundIt
69
+ end
70
70
  end
71
71
 
72
72
  return get_state()
@@ -75,12 +75,12 @@ module CharDet
75
75
  def get_confidence
76
76
  unlike = 0.99
77
77
  if @_mNumOfMBChar < 6
78
- for i in (0...@_mNumOfMBChar)
79
- unlike = unlike * ONE_CHAR_PROB
80
- end
81
- return 1.0 - unlike
78
+ for i in (0...@_mNumOfMBChar)
79
+ unlike = unlike * ONE_CHAR_PROB
80
+ end
81
+ return 1.0 - unlike
82
82
  else
83
- return unlike
83
+ return unlike
84
84
  end
85
85
  end
86
86
  end
@@ -33,7 +33,8 @@ module TMail
33
33
  MAJOR = 1
34
34
  MINOR = 2
35
35
  TINY = 7
36
+ MICRO = 1
36
37
 
37
- STRING = [MAJOR, MINOR, TINY].join('.')
38
+ STRING = [MAJOR, MINOR, TINY, MICRO].join('.')
38
39
  end
39
40
  end
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{tmail}
5
- s.version = "1.2.7"
5
+ s.version = "1.2.7.1"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["Mikel Lindsaar <raasdnil AT gmail.com>"]
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tmail
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.7
4
+ version: 1.2.7.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mikel Lindsaar <raasdnil AT gmail.com>