RubyGems - tmail - Versions diffs - 1.2.7 → 1.2.7.1 - Mend

tmail 1.2.7 → 1.2.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/jpcntx.rb CHANGED

@@ -144,22 +144,22 @@ module CharDet
       # this character will simply our logic and improve performance.
       i = @_mNeedToSkipCharNum
       while i < aLen
-	order, charLen = get_order(aBuf[i...i+2])
-	i += charLen
-	if i > aLen
-	  @_mNeedToSkipCharNum = i - aLen
-	  @_mLastCharOrder = -1
-	else
-	  if (order != -1) and (@_mLastCharOrder != -1):
-	    @_mTotalRel += 1
-	    if @_mTotalRel > MAX_REL_THRESHOLD:
-	      @_mDone = true
-	      break
-	    end
-	    @_mRelSample[jp2CharContext[@_mLastCharOrder][order]] += 1
-	  end
-	  @_mLastCharOrder = order
-	end
+        order, charLen = get_order(aBuf[i...i+2])
+        i += charLen
+        if i > aLen
+          @_mNeedToSkipCharNum = i - aLen
+          @_mLastCharOrder = -1
+        else
+          if (order != -1) and (@_mLastCharOrder != -1)
+            @_mTotalRel += 1
+            if @_mTotalRel > MAX_REL_THRESHOLD
+              @_mDone = true
+              break
+            end
+            @_mRelSample[jp2CharContext[@_mLastCharOrder][order]] += 1
+          end
+          @_mLastCharOrder = order
+        end
       end
     end
@@ -169,10 +169,10 @@ module CharDet
     def get_confidence
       # This is just one way to calculate confidence. It works well for me.
-      if @_mTotalRel > MINIMUM_DATA_THRESHOLD:
-	return (@_mTotalRel - @_mRelSample[0]) / @_mTotalRel
+      if @_mTotalRel > MINIMUM_DATA_THRESHOLD
+        return (@_mTotalRel - @_mRelSample[0]) / @_mTotalRel
       else
-	return DONT_KNOW
+        return DONT_KNOW
       end
     end
@@ -188,15 +188,15 @@ module CharDet
       # find out current char's byte length
       aStr = aStr[0..1].join if aStr.class == Array
       if ((aStr[0..0] >= "\x81") and (aStr[0..0] <= "\x9F")) or ((aStr[0..0] >= "\xE0") and (aStr[0..0] <= "\xFC"))
-	charLen = 2
+        charLen = 2
       else
-	charLen = 1
+        charLen = 1
       end
       # return its order if it is hiragana
       if aStr.length > 1
-	if (aStr[0..0] == "\202") and (aStr[1..1] >= "\x9F") and (aStr[1..1] <= "\xF1")
-	  return aStr[1] - 0x9F, charLen
-	end
+        if (aStr[0..0] == "\202") and (aStr[1..1] >= "\x9F") and (aStr[1..1] <= "\xF1")
+          return aStr[1] - 0x9F, charLen
+        end
       end
       return -1, charLen
@@ -208,19 +208,19 @@ module CharDet
       return -1, 1 unless aStr
       # find out current char's byte length
       aStr = aStr[0..1].join if aStr.class == Array
-      if (aStr[0..0] == "\x8E") or ((aStr[0..0] >= "\xA1") and (aStr[0..0] <= "\xFE")):
-	charLen = 2
+      if (aStr[0..0] == "\x8E") or ((aStr[0..0] >= "\xA1") and (aStr[0..0] <= "\xFE"))
+        charLen = 2
       elsif aStr[0..0] == "\x8F"
-	charLen = 3
+        charLen = 3
       else
-	charLen = 1
+        charLen = 1
       end
       # return its order if it is hiragana
       if aStr.length > 1
-	if (aStr[0..0] == "\xA4") and (aStr[1..1] >= "\xA1") and (aStr[1..1] <= "\xF3")
-	  return aStr[1] - 0xA1, charLen
-	end
+        if (aStr[0..0] == "\xA4") and (aStr[1..1] >= "\xA1") and (aStr[1..1] <= "\xF3")
+          return aStr[1] - 0xA1, charLen
+        end
       end
       return -1, charLen

data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/latin1prober.rb CHANGED

@@ -110,15 +110,15 @@ module CharDet
     def feed(aBuf)
       aBuf = filter_with_english_letters(aBuf)
       aBuf.each_byte do |b|
-	c = b.chr
-	charClass = Latin1_CharToClass[c[0]]
-	freq = Latin1ClassModel[(@_mLastCharClass * CLASS_NUM) + charClass]
-	if freq == 0
-	  @_mState = ENotMe
-	  break
-	end
-	@_mFreqCounter[freq] += 1
-	@_mLastCharClass = charClass
+        c = b.chr
+        charClass = Latin1_CharToClass[c[0]]
+        freq = Latin1ClassModel[(@_mLastCharClass * CLASS_NUM) + charClass]
+        if freq == 0
+          @_mState = ENotMe
+          break
+        end
+        @_mFreqCounter[freq] += 1
+        @_mLastCharClass = charClass
       end
       return get_state()
@@ -126,17 +126,17 @@ module CharDet
     def get_confidence
       if get_state() == ENotMe
-	return 0.01
+        return 0.01
       end
       total = @_mFreqCounter.inject{|a,b| a+b}
       if total < 0.01
-	confidence = 0.0
+        confidence = 0.0
       else
-	confidence = (@_mFreqCounter[3] / total) - (@_mFreqCounter[1] * 20.0 / total)
+        confidence = (@_mFreqCounter[3] / total) - (@_mFreqCounter[1] * 20.0 / total)
       end
       if confidence < 0.0
-	confidence = 0.0
+        confidence = 0.0
       end
       # lower the confidence of latin1 so that other more accurate detector
       # can take priority.

data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/mbcharsetprober.rb CHANGED

@@ -40,10 +40,10 @@ module CharDet
     def reset
       super
       if @_mCodingSM
-	@_mCodingSM.reset()
+        @_mCodingSM.reset()
       end
       if @_mDistributionAnalyzer
-	@_mDistributionAnalyzer.reset()
+        @_mDistributionAnalyzer.reset()
       end
       @_mLastChar = "\x00\x00"
     end
@@ -54,30 +54,30 @@ module CharDet
     def feed(aBuf)
       aLen = aBuf.length
       for i in (0...aLen)
-	codingState = @_mCodingSM.next_state(aBuf[i..i])
-	if codingState == EError
-	  $stderr << "#{get_charset_name} prober hit error at byte #{i}\n" if $debug
-	  @_mState = ENotMe
-	  break
-	elsif codingState == EItsMe
-	  @_mState = EFoundIt
-	  break
-	elsif codingState == EStart
-	  charLen = @_mCodingSM.get_current_charlen()
-	  if i == 0
-	    @_mLastChar[1] = aBuf[0..0]
-	    @_mDistributionAnalyzer.feed(@_mLastChar, charLen)
-	  else
-	    @_mDistributionAnalyzer.feed(aBuf[i-1...i+1], charLen)
-	  end
-	end
+        codingState = @_mCodingSM.next_state(aBuf[i..i])
+        if codingState == EError
+          $stderr << "#{get_charset_name} prober hit error at byte #{i}\n" if $debug
+          @_mState = ENotMe
+          break
+        elsif codingState == EItsMe
+          @_mState = EFoundIt
+          break
+        elsif codingState == EStart
+          charLen = @_mCodingSM.get_current_charlen()
+          if i == 0
+            @_mLastChar[1] = aBuf[0..0]
+            @_mDistributionAnalyzer.feed(@_mLastChar, charLen)
+          else
+            @_mDistributionAnalyzer.feed(aBuf[i-1...i+1], charLen)
+          end
+        end
       end
       @_mLastChar[0] = aBuf[aLen-1..aLen-1]
       if get_state() == EDetecting
-	if @_mDistributionAnalyzer.got_enough_data() and (get_confidence() > SHORTCUT_THRESHOLD)
-	  @_mState = EFoundIt
-	end
+        if @_mDistributionAnalyzer.got_enough_data() and (get_confidence() > SHORTCUT_THRESHOLD)
+          @_mState = EFoundIt
+        end
       end
       return get_state()
     end

data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/mbcsgroupprober.rb CHANGED

@@ -32,15 +32,13 @@ module CharDet
   class MBCSGroupProber < CharSetGroupProber
     def initialize
       super
-      @_mProbers = [
-	UTF8Prober.new,
-	SJISProber.new,
-	EUCJPProber.new,
-	GB2312Prober.new,
-	EUCKRProber.new,
-	Big5Prober.new,
-	EUCTWProber.new
-      ]
+      @_mProbers = [ UTF8Prober.new,
+                     SJISProber.new,
+                     EUCJPProber.new,
+                     GB2312Prober.new,
+                     EUCKRProber.new,
+                     Big5Prober.new,
+                     EUCTWProber.new ]
       reset()
     end
   end

data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/mbcssm.rb CHANGED

@@ -73,10 +73,10 @@ module CharDet
   Big5CharLenTable = [0, 1, 1, 2, 0]
   Big5SMModel = {'classTable' =>  BIG5_cls,
-	       'classFactor' =>  5,
-	       'stateTable' =>  BIG5_st,
-	       'charLenTable' =>  Big5CharLenTable,
-	       'name' =>  'Big5'
+                 'classFactor' =>  5,
+                 'stateTable' =>  BIG5_st,
+                 'charLenTable' =>  Big5CharLenTable,
+                 'name' =>  'Big5'
   }
   # EUC-JP
@@ -127,10 +127,10 @@ module CharDet
   EUCJPCharLenTable = [2, 2, 2, 3, 1, 0]
   EUCJPSMModel = {'classTable' =>  EUCJP_cls,
-		'classFactor' =>  6,
-		'stateTable' =>  EUCJP_st,
-		'charLenTable' =>  EUCJPCharLenTable,
-		'name' =>  'EUC-JP'
+                  'classFactor' =>  6,
+                  'stateTable' =>  EUCJP_st,
+                  'charLenTable' =>  EUCJPCharLenTable,
+                  'name' =>  'EUC-JP'
   }
   # EUC-KR
@@ -178,10 +178,10 @@ module CharDet
   EUCKRCharLenTable = [0, 1, 2, 0]
   EUCKRSMModel = {'classTable' =>  EUCKR_cls,
-		'classFactor' =>  4,
-		'stateTable' =>  EUCKR_st,
-		'charLenTable' =>  EUCKRCharLenTable,
-		'name' =>  'EUC-KR'
+                  'classFactor' =>  4,
+                  'stateTable' =>  EUCKR_st,
+                  'charLenTable' =>  EUCKRCharLenTable,
+                  'name' =>  'EUC-KR'
   }
   # EUC-TW
@@ -233,10 +233,10 @@ module CharDet
   EUCTWCharLenTable = [0, 0, 1, 2, 2, 2, 3]
   EUCTWSMModel = {'classTable' =>  EUCTW_cls,
-		'classFactor' =>  7,
-		'stateTable' =>  EUCTW_st,
-		'charLenTable' =>  EUCTWCharLenTable,
-		'name' =>  'x-euc-tw'
+                  'classFactor' =>  7,
+                  'stateTable' =>  EUCTW_st,
+                  'charLenTable' =>  EUCTWCharLenTable,
+                  'name' =>  'x-euc-tw'
   }
   # GB2312
@@ -293,10 +293,10 @@ module CharDet
   GB2312CharLenTable = [0, 1, 1, 1, 1, 1, 2]
   GB2312SMModel = {'classTable' =>  GB2312_cls,
-		  'classFactor' =>  7,
-		  'stateTable' =>  GB2312_st,
-		  'charLenTable' =>  GB2312CharLenTable,
-		  'name' =>  'GB2312'
+                   'classFactor' =>  7,
+                   'stateTable' =>  GB2312_st,
+                   'charLenTable' =>  GB2312CharLenTable,
+                   'name' =>  'GB2312'
   }
   # Shift_JIS
@@ -347,10 +347,10 @@ module CharDet
   SJISCharLenTable = [0, 1, 1, 2, 0, 0]
   SJISSMModel = {'classTable' =>  SJIS_cls,
-	       'classFactor' =>  6,
-	       'stateTable' =>  SJIS_st,
-	       'charLenTable' =>  SJISCharLenTable,
-	       'name' =>  'Shift_JIS'
+                 'classFactor' =>  6,
+                 'stateTable' =>  SJIS_st,
+                 'charLenTable' =>  SJISCharLenTable,
+                 'name' =>  'Shift_JIS'
   }
   # UCS2-BE
@@ -403,10 +403,10 @@ module CharDet
   UCS2BECharLenTable = [2, 2, 2, 0, 2, 2]
   UCS2BESMModel = {'classTable' =>  UCS2BE_cls,
-		 'classFactor' =>  6,
-		 'stateTable' =>  UCS2BE_st,
-		 'charLenTable' =>  UCS2BECharLenTable,
-		 'name' =>  'UTF-16BE'
+                   'classFactor' =>  6,
+                   'stateTable' =>  UCS2BE_st,
+                   'charLenTable' =>  UCS2BECharLenTable,
+                   'name' =>  'UTF-16BE'
   }
   # UCS2-LE
@@ -459,10 +459,10 @@ module CharDet
   UCS2LECharLenTable = [2, 2, 2, 2, 2, 2]
   UCS2LESMModel = {'classTable' =>  UCS2LE_cls,
-		 'classFactor' =>  6,
-		 'stateTable' =>  UCS2LE_st,
-		 'charLenTable' =>  UCS2LECharLenTable,
-		 'name' =>  'UTF-16LE'
+                   'classFactor' =>  6,
+                   'stateTable' =>  UCS2LE_st,
+                   'charLenTable' =>  UCS2LECharLenTable,
+                   'name' =>  'UTF-16LE'
   }
   # UTF-8
@@ -534,9 +534,9 @@ module CharDet
   UTF8CharLenTable = [0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6]
   UTF8SMModel = {'classTable' =>  UTF8_cls,
-	       'classFactor' =>  16,
-	       'stateTable' =>  UTF8_st,
-	       'charLenTable' =>  UTF8CharLenTable,
-	       'name' =>  'UTF-8'
+                 'classFactor' =>  16,
+                 'stateTable' =>  UTF8_st,
+                 'charLenTable' =>  UTF8CharLenTable,
+                 'name' =>  'UTF-8'
   }
 end

data/lib/tmail/vendor/rchardet-1.3/lib/rchardet/sbcharsetprober.rb CHANGED

@@ -57,51 +57,51 @@ module CharDet
     def get_charset_name
       if @_mNameProber
-	return @_mNameProber.get_charset_name()
+        return @_mNameProber.get_charset_name()
       else
-	return @_mModel['charsetName']
+        return @_mModel['charsetName']
       end
     end
     def feed(aBuf)
       if not @_mModel['keepEnglishLetter']
-	aBuf = filter_without_english_letters(aBuf)
+        aBuf = filter_without_english_letters(aBuf)
       end
       aLen = aBuf.length
       if not aLen
-	return get_state()
+        return get_state()
       end
       aBuf.each_byte do |b|
-	c = b.chr
-	order = @_mModel['charToOrderMap'][c[0]]
-	if order < SYMBOL_CAT_ORDER
-	  @_mTotalChar += 1
-	end
-	if order < SAMPLE_SIZE
-	  @_mFreqChar += 1
-	  if @_mLastOrder < SAMPLE_SIZE
-	    @_mTotalSeqs += 1
-	    if not @_mReversed
-	      @_mSeqCounters[@_mModel['precedenceMatrix'][(@_mLastOrder * SAMPLE_SIZE) + order]] += 1
-	    else # reverse the order of the letters in the lookup
-	      @_mSeqCounters[@_mModel['precedenceMatrix'][(order * SAMPLE_SIZE) + @_mLastOrder]] += 1
-	    end
-	  end
-	end
-	@_mLastOrder = order
+        c = b.chr
+        order = @_mModel['charToOrderMap'][c[0]]
+        if order < SYMBOL_CAT_ORDER
+          @_mTotalChar += 1
+        end
+        if order < SAMPLE_SIZE
+          @_mFreqChar += 1
+          if @_mLastOrder < SAMPLE_SIZE
+            @_mTotalSeqs += 1
+            if not @_mReversed
+              @_mSeqCounters[@_mModel['precedenceMatrix'][(@_mLastOrder * SAMPLE_SIZE) + order]] += 1
+            else # reverse the order of the letters in the lookup
+              @_mSeqCounters[@_mModel['precedenceMatrix'][(order * SAMPLE_SIZE) + @_mLastOrder]] += 1
+            end
+          end
+        end
+        @_mLastOrder = order
       end
       if get_state() == EDetecting
-	if @_mTotalSeqs > SB_ENOUGH_REL_THRESHOLD
-	  cf = get_confidence()
-	  if cf > POSITIVE_SHORTCUT_THRESHOLD
-	    $stderr << "#{@_mModel['charsetName']} confidence = #{cf}, we have a winner\n" if $debug
-	    @_mState = EFoundIt
-	  elsif cf < NEGATIVE_SHORTCUT_THRESHOLD
-	    $stderr << "#{@_mModel['charsetName']} confidence = #{cf}, below negative shortcut threshold #{NEGATIVE_SHORTCUT_THRESHOLD}\n" if $debug
-	    @_mState = ENotMe
-	  end
-	end
+        if @_mTotalSeqs > SB_ENOUGH_REL_THRESHOLD
+          cf = get_confidence()
+          if cf > POSITIVE_SHORTCUT_THRESHOLD
+            $stderr << "#{@_mModel['charsetName']} confidence = #{cf}, we have a winner\n" if $debug
+            @_mState = EFoundIt
+          elsif cf < NEGATIVE_SHORTCUT_THRESHOLD
+            $stderr << "#{@_mModel['charsetName']} confidence = #{cf}, below negative shortcut threshold #{NEGATIVE_SHORTCUT_THRESHOLD}\n" if $debug
+            @_mState = ENotMe
+          end
+        end
       end
       return get_state()
@@ -110,13 +110,13 @@ module CharDet
     def get_confidence
       r = 0.01
       if @_mTotalSeqs > 0
-	#            print self._mSeqCounters[POSITIVE_CAT], self._mTotalSeqs, self._mModel['mTypicalPositiveRatio']
-	r = (1.0 * @_mSeqCounters[POSITIVE_CAT]) / @_mTotalSeqs / @_mModel['mTypicalPositiveRatio']
-	#            print r, self._mFreqChar, self._mTotalChar
-	r = r * @_mFreqChar / @_mTotalChar
-	if r >= 1.0
-	  r = 0.99
-	end
+        #            print self._mSeqCounters[POSITIVE_CAT], self._mTotalSeqs, self._mModel['mTypicalPositiveRatio']
+        r = (1.0 * @_mSeqCounters[POSITIVE_CAT]) / @_mTotalSeqs / @_mModel['mTypicalPositiveRatio']
+        #            print r, self._mFreqChar, self._mTotalChar
+        r = r * @_mFreqChar / @_mTotalChar
+        if r >= 1.0
+          r = 0.99
+        end
       end
       return r
     end