rchardet 1.3.1 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/rchardet.rb +1 -3
 - data/lib/rchardet/big5freq.rb +2 -2
 - data/lib/rchardet/big5prober.rb +2 -2
 - data/lib/rchardet/chardistribution.rb +74 -69
 - data/lib/rchardet/charsetgroupprober.rb +50 -52
 - data/lib/rchardet/charsetprober.rb +2 -7
 - data/lib/rchardet/codingstatemachine.rb +14 -13
 - data/lib/rchardet/constants.rb +0 -0
 - data/lib/rchardet/escprober.rb +34 -34
 - data/lib/rchardet/escsm.rb +33 -32
 - data/lib/rchardet/eucjpprober.rb +28 -28
 - data/lib/rchardet/euckrfreq.rb +2 -1
 - data/lib/rchardet/euckrprober.rb +2 -2
 - data/lib/rchardet/euctwfreq.rb +2 -1
 - data/lib/rchardet/euctwprober.rb +2 -2
 - data/lib/rchardet/gb2312freq.rb +2 -2
 - data/lib/rchardet/gb2312prober.rb +2 -2
 - data/lib/rchardet/hebrewprober.rb +40 -40
 - data/lib/rchardet/jisfreq.rb +2 -1
 - data/lib/rchardet/jpcntx.rb +131 -130
 - data/lib/rchardet/langbulgarianmodel.rb +6 -6
 - data/lib/rchardet/langcyrillicmodel.rb +13 -13
 - data/lib/rchardet/langgreekmodel.rb +5 -5
 - data/lib/rchardet/langhebrewmodel.rb +3 -3
 - data/lib/rchardet/langhungarianmodel.rb +5 -5
 - data/lib/rchardet/langthaimodel.rb +3 -3
 - data/lib/rchardet/latin1prober.rb +18 -18
 - data/lib/rchardet/mbcharsetprober.rb +30 -30
 - data/lib/rchardet/mbcsgroupprober.rb +9 -9
 - data/lib/rchardet/mbcssm.rb +72 -72
 - data/lib/rchardet/sbcharsetprober.rb +48 -50
 - data/lib/rchardet/sbcsgroupprober.rb +16 -16
 - data/lib/rchardet/sjisprober.rb +28 -28
 - data/lib/rchardet/universaldetector.rb +92 -90
 - data/lib/rchardet/utf8prober.rb +25 -25
 - data/lib/rchardet/version.rb +3 -0
 - metadata +30 -47
 - data/COPYING +0 -504
 - data/README +0 -12
 
| 
         @@ -31,36 +31,37 @@ module CharDet 
     | 
|
| 
       31 
31 
     | 
    
         
             
                attr_accessor :active
         
     | 
| 
       32 
32 
     | 
    
         | 
| 
       33 
33 
     | 
    
         
             
                def initialize(sm)
         
     | 
| 
       34 
     | 
    
         
            -
                  @ 
     | 
| 
       35 
     | 
    
         
            -
                  @ 
     | 
| 
       36 
     | 
    
         
            -
                  @ 
     | 
| 
      
 34 
     | 
    
         
            +
                  @model = sm
         
     | 
| 
      
 35 
     | 
    
         
            +
                  @currentBytePos = 0
         
     | 
| 
      
 36 
     | 
    
         
            +
                  @currentCharLen = 0
         
     | 
| 
       37 
37 
     | 
    
         
             
                  reset()
         
     | 
| 
       38 
38 
     | 
    
         
             
                end
         
     | 
| 
       39 
39 
     | 
    
         | 
| 
       40 
40 
     | 
    
         
             
                def reset
         
     | 
| 
       41 
     | 
    
         
            -
                  @ 
     | 
| 
      
 41 
     | 
    
         
            +
                  @currentState = EStart
         
     | 
| 
       42 
42 
     | 
    
         
             
                end
         
     | 
| 
       43 
43 
     | 
    
         | 
| 
       44 
44 
     | 
    
         
             
                def next_state(c)
         
     | 
| 
       45 
45 
     | 
    
         
             
                  # for each byte we get its class
         
     | 
| 
       46 
46 
     | 
    
         
             
                  # if it is first byte, we also get byte length
         
     | 
| 
       47 
     | 
    
         
            -
                   
     | 
| 
       48 
     | 
    
         
            -
                   
     | 
| 
       49 
     | 
    
         
            -
             
     | 
| 
       50 
     | 
    
         
            -
             
     | 
| 
      
 47 
     | 
    
         
            +
                  b = c.bytes.first
         
     | 
| 
      
 48 
     | 
    
         
            +
                  byteCls = @model['classTable'][b]
         
     | 
| 
      
 49 
     | 
    
         
            +
                  if @currentState == EStart
         
     | 
| 
      
 50 
     | 
    
         
            +
                    @currentBytePos = 0
         
     | 
| 
      
 51 
     | 
    
         
            +
                    @currentCharLen = @model['charLenTable'][byteCls]
         
     | 
| 
       51 
52 
     | 
    
         
             
                  end
         
     | 
| 
       52 
53 
     | 
    
         
             
                  # from byte's class and stateTable, we get its next state
         
     | 
| 
       53 
     | 
    
         
            -
                  @ 
     | 
| 
       54 
     | 
    
         
            -
                  @ 
     | 
| 
       55 
     | 
    
         
            -
                  return @ 
     | 
| 
      
 54 
     | 
    
         
            +
                  @currentState = @model['stateTable'][@currentState * @model['classFactor'] + byteCls]
         
     | 
| 
      
 55 
     | 
    
         
            +
                  @currentBytePos += 1
         
     | 
| 
      
 56 
     | 
    
         
            +
                  return @currentState
         
     | 
| 
       56 
57 
     | 
    
         
             
                end
         
     | 
| 
       57 
58 
     | 
    
         | 
| 
       58 
59 
     | 
    
         
             
                def get_current_charlen
         
     | 
| 
       59 
     | 
    
         
            -
                  return @ 
     | 
| 
      
 60 
     | 
    
         
            +
                  return @currentCharLen
         
     | 
| 
       60 
61 
     | 
    
         
             
                end
         
     | 
| 
       61 
62 
     | 
    
         | 
| 
       62 
63 
     | 
    
         
             
                def get_coding_state_machine
         
     | 
| 
       63 
     | 
    
         
            -
                  return @ 
     | 
| 
      
 64 
     | 
    
         
            +
                  return @model['name']
         
     | 
| 
       64 
65 
     | 
    
         
             
                end
         
     | 
| 
       65 
66 
     | 
    
         
             
              end
         
     | 
| 
       66 
67 
     | 
    
         
             
            end
         
     | 
    
        data/lib/rchardet/constants.rb
    CHANGED
    
    | 
         
            File without changes
         
     | 
    
        data/lib/rchardet/escprober.rb
    CHANGED
    
    | 
         @@ -30,58 +30,58 @@ module CharDet 
     | 
|
| 
       30 
30 
     | 
    
         
             
              class EscCharSetProber < CharSetProber
         
     | 
| 
       31 
31 
     | 
    
         
             
                def initialize
         
     | 
| 
       32 
32 
     | 
    
         
             
                  super()
         
     | 
| 
       33 
     | 
    
         
            -
                  @ 
     | 
| 
       34 
     | 
    
         
            -
             
     | 
| 
       35 
     | 
    
         
            -
             
     | 
| 
       36 
     | 
    
         
            -
             
     | 
| 
       37 
     | 
    
         
            -
             
     | 
| 
       38 
     | 
    
         
            -
             
     | 
| 
      
 33 
     | 
    
         
            +
                  @codingSM = [ 
         
     | 
| 
      
 34 
     | 
    
         
            +
                                 CodingStateMachine.new(HZSMModel),
         
     | 
| 
      
 35 
     | 
    
         
            +
                                 CodingStateMachine.new(ISO2022CNSMModel),
         
     | 
| 
      
 36 
     | 
    
         
            +
                                 CodingStateMachine.new(ISO2022JPSMModel),
         
     | 
| 
      
 37 
     | 
    
         
            +
                                 CodingStateMachine.new(ISO2022KRSMModel)
         
     | 
| 
      
 38 
     | 
    
         
            +
                                ]
         
     | 
| 
       39 
39 
     | 
    
         
             
                  reset()
         
     | 
| 
       40 
40 
     | 
    
         
             
                end
         
     | 
| 
       41 
41 
     | 
    
         | 
| 
       42 
42 
     | 
    
         
             
                def reset
         
     | 
| 
       43 
43 
     | 
    
         
             
                  super()
         
     | 
| 
       44 
     | 
    
         
            -
                  for codingSM in @ 
     | 
| 
       45 
     | 
    
         
            -
             
     | 
| 
       46 
     | 
    
         
            -
             
     | 
| 
       47 
     | 
    
         
            -
             
     | 
| 
      
 44 
     | 
    
         
            +
                  for codingSM in @codingSM
         
     | 
| 
      
 45 
     | 
    
         
            +
                    next if !codingSM
         
     | 
| 
      
 46 
     | 
    
         
            +
                    codingSM.active = true
         
     | 
| 
      
 47 
     | 
    
         
            +
                    codingSM.reset()
         
     | 
| 
       48 
48 
     | 
    
         
             
                  end
         
     | 
| 
       49 
     | 
    
         
            -
                  @ 
     | 
| 
       50 
     | 
    
         
            -
                  @ 
     | 
| 
      
 49 
     | 
    
         
            +
                  @activeSM = @codingSM.length
         
     | 
| 
      
 50 
     | 
    
         
            +
                  @detectedCharset = nil
         
     | 
| 
       51 
51 
     | 
    
         
             
                end
         
     | 
| 
       52 
52 
     | 
    
         | 
| 
       53 
53 
     | 
    
         
             
                def get_charset_name
         
     | 
| 
       54 
     | 
    
         
            -
                  return @ 
     | 
| 
      
 54 
     | 
    
         
            +
                  return @detectedCharset
         
     | 
| 
       55 
55 
     | 
    
         
             
                end
         
     | 
| 
       56 
56 
     | 
    
         | 
| 
       57 
57 
     | 
    
         
             
                def get_confidence
         
     | 
| 
       58 
     | 
    
         
            -
                  if @ 
     | 
| 
       59 
     | 
    
         
            -
             
     | 
| 
      
 58 
     | 
    
         
            +
                  if @detectedCharset
         
     | 
| 
      
 59 
     | 
    
         
            +
                    return 0.99
         
     | 
| 
       60 
60 
     | 
    
         
             
                  else
         
     | 
| 
       61 
     | 
    
         
            -
             
     | 
| 
      
 61 
     | 
    
         
            +
                    return 0.00
         
     | 
| 
       62 
62 
     | 
    
         
             
                  end
         
     | 
| 
       63 
63 
     | 
    
         
             
                end
         
     | 
| 
       64 
64 
     | 
    
         | 
| 
       65 
65 
     | 
    
         
             
                def feed(aBuf)
         
     | 
| 
       66 
66 
     | 
    
         
             
                  aBuf.each_byte do |b|
         
     | 
| 
       67 
     | 
    
         
            -
             
     | 
| 
       68 
     | 
    
         
            -
             
     | 
| 
       69 
     | 
    
         
            -
             
     | 
| 
       70 
     | 
    
         
            -
             
     | 
| 
       71 
     | 
    
         
            -
             
     | 
| 
       72 
     | 
    
         
            -
             
     | 
| 
       73 
     | 
    
         
            -
             
     | 
| 
       74 
     | 
    
         
            -
             
     | 
| 
       75 
     | 
    
         
            -
             
     | 
| 
       76 
     | 
    
         
            -
             
     | 
| 
       77 
     | 
    
         
            -
             
     | 
| 
       78 
     | 
    
         
            -
             
     | 
| 
       79 
     | 
    
         
            -
             
     | 
| 
       80 
     | 
    
         
            -
             
     | 
| 
       81 
     | 
    
         
            -
             
     | 
| 
       82 
     | 
    
         
            -
             
     | 
| 
       83 
     | 
    
         
            -
             
     | 
| 
       84 
     | 
    
         
            -
             
     | 
| 
      
 67 
     | 
    
         
            +
                    c = b.chr
         
     | 
| 
      
 68 
     | 
    
         
            +
                    for codingSM in @codingSM
         
     | 
| 
      
 69 
     | 
    
         
            +
                      next unless codingSM
         
     | 
| 
      
 70 
     | 
    
         
            +
                      next unless codingSM.active
         
     | 
| 
      
 71 
     | 
    
         
            +
                      codingState = codingSM.next_state(c)
         
     | 
| 
      
 72 
     | 
    
         
            +
                      if codingState == EError
         
     | 
| 
      
 73 
     | 
    
         
            +
                        codingSM.active = false
         
     | 
| 
      
 74 
     | 
    
         
            +
                        @activeSM -= 1
         
     | 
| 
      
 75 
     | 
    
         
            +
                        if @activeSM <= 0
         
     | 
| 
      
 76 
     | 
    
         
            +
                          @state = ENotMe
         
     | 
| 
      
 77 
     | 
    
         
            +
                          return get_state()
         
     | 
| 
      
 78 
     | 
    
         
            +
                        end
         
     | 
| 
      
 79 
     | 
    
         
            +
                      elsif codingState == EItsMe
         
     | 
| 
      
 80 
     | 
    
         
            +
                        @state = EFoundIt
         
     | 
| 
      
 81 
     | 
    
         
            +
                        @detectedCharset = codingSM.get_coding_state_machine()
         
     | 
| 
      
 82 
     | 
    
         
            +
                        return get_state()
         
     | 
| 
      
 83 
     | 
    
         
            +
                      end
         
     | 
| 
      
 84 
     | 
    
         
            +
                    end
         
     | 
| 
       85 
85 
     | 
    
         
             
                  end
         
     | 
| 
       86 
86 
     | 
    
         | 
| 
       87 
87 
     | 
    
         
             
                  return get_state()
         
     | 
    
        data/lib/rchardet/escsm.rb
    CHANGED
    
    | 
         @@ -7,6 +7,7 @@ 
     | 
|
| 
       7 
7 
     | 
    
         
             
            # the Initial Developer. All Rights Reserved.
         
     | 
| 
       8 
8 
     | 
    
         
             
            #
         
     | 
| 
       9 
9 
     | 
    
         
             
            # Contributor(s):
         
     | 
| 
      
 10 
     | 
    
         
            +
            #   Jeff Hodges - port to Ruby
         
     | 
| 
       10 
11 
     | 
    
         
             
            #   Mark Pilgrim - port to Python
         
     | 
| 
       11 
12 
     | 
    
         
             
            #
         
     | 
| 
       12 
13 
     | 
    
         
             
            # This library is free software; you can redistribute it and/or
         
     | 
| 
         @@ -59,7 +60,7 @@ module CharDet 
     | 
|
| 
       59 
60 
     | 
    
         
             
                1,1,1,1,1,1,1,1,  # e8 - ef 
         
     | 
| 
       60 
61 
     | 
    
         
             
                1,1,1,1,1,1,1,1,  # f0 - f7 
         
     | 
| 
       61 
62 
     | 
    
         
             
                1,1,1,1,1,1,1,1,  # f8 - ff 
         
     | 
| 
       62 
     | 
    
         
            -
              ]
         
     | 
| 
      
 63 
     | 
    
         
            +
              ].freeze
         
     | 
| 
       63 
64 
     | 
    
         | 
| 
       64 
65 
     | 
    
         
             
              HZ_st = [
         
     | 
| 
       65 
66 
     | 
    
         
             
                EStart,EError,     3,EStart,EStart,EStart,EError,EError,# 00-07 
         
     | 
| 
         @@ -68,16 +69,16 @@ module CharDet 
     | 
|
| 
       68 
69 
     | 
    
         
             
                5,EError,     6,EError,     5,     5,     4,EError,# 18-1f 
         
     | 
| 
       69 
70 
     | 
    
         
             
                4,EError,     4,     4,     4,EError,     4,EError,# 20-27 
         
     | 
| 
       70 
71 
     | 
    
         
             
                4,EItsMe,EStart,EStart,EStart,EStart,EStart,EStart,# 28-2f 
         
     | 
| 
       71 
     | 
    
         
            -
              ]
         
     | 
| 
      
 72 
     | 
    
         
            +
              ].freeze
         
     | 
| 
       72 
73 
     | 
    
         | 
| 
       73 
     | 
    
         
            -
              HZCharLenTable = [0, 0, 0, 0, 0, 0]
         
     | 
| 
      
 74 
     | 
    
         
            +
              HZCharLenTable = [0, 0, 0, 0, 0, 0].freeze
         
     | 
| 
       74 
75 
     | 
    
         | 
| 
       75 
76 
     | 
    
         
             
              HZSMModel = {'classTable' => HZ_cls,
         
     | 
| 
       76 
     | 
    
         
            -
             
     | 
| 
       77 
     | 
    
         
            -
             
     | 
| 
       78 
     | 
    
         
            -
             
     | 
| 
       79 
     | 
    
         
            -
             
     | 
| 
       80 
     | 
    
         
            -
            }
         
     | 
| 
      
 77 
     | 
    
         
            +
                'classFactor' => 6,
         
     | 
| 
      
 78 
     | 
    
         
            +
                'stateTable' => HZ_st,
         
     | 
| 
      
 79 
     | 
    
         
            +
                'charLenTable' => HZCharLenTable,
         
     | 
| 
      
 80 
     | 
    
         
            +
                'name' => "HZ-GB-2312"
         
     | 
| 
      
 81 
     | 
    
         
            +
              }.freeze
         
     | 
| 
       81 
82 
     | 
    
         | 
| 
       82 
83 
     | 
    
         
             
            ISO2022CN_cls = [
         
     | 
| 
       83 
84 
     | 
    
         
             
            2,0,0,0,0,0,0,0,  # 00 - 07 
         
     | 
| 
         @@ -112,7 +113,7 @@ ISO2022CN_cls = [ 
     | 
|
| 
       112 
113 
     | 
    
         
             
            2,2,2,2,2,2,2,2,  # e8 - ef 
         
     | 
| 
       113 
114 
     | 
    
         
             
            2,2,2,2,2,2,2,2,  # f0 - f7 
         
     | 
| 
       114 
115 
     | 
    
         
             
            2,2,2,2,2,2,2,2,  # f8 - ff 
         
     | 
| 
       115 
     | 
    
         
            -
            ]
         
     | 
| 
      
 116 
     | 
    
         
            +
            ].freeze
         
     | 
| 
       116 
117 
     | 
    
         | 
| 
       117 
118 
     | 
    
         
             
            ISO2022CN_st = [
         
     | 
| 
       118 
119 
     | 
    
         
             
            EStart,     3,EError,EStart,EStart,EStart,EStart,EStart,# 00-07 
         
     | 
| 
         @@ -123,16 +124,16 @@ EError,EError,EError,EItsMe,EError,EError,EError,EError,# 20-27 
     | 
|
| 
       123 
124 
     | 
    
         
             
                 5,     6,EError,EError,EError,EError,EError,EError,# 28-2f 
         
     | 
| 
       124 
125 
     | 
    
         
             
            EError,EError,EError,EItsMe,EError,EError,EError,EError,# 30-37 
         
     | 
| 
       125 
126 
     | 
    
         
             
            EError,EError,EError,EError,EError,EItsMe,EError,EStart,# 38-3f 
         
     | 
| 
       126 
     | 
    
         
            -
            ]
         
     | 
| 
      
 127 
     | 
    
         
            +
            ].freeze
         
     | 
| 
       127 
128 
     | 
    
         | 
| 
       128 
     | 
    
         
            -
            ISO2022CNCharLenTable = [0, 0, 0, 0, 0, 0, 0, 0, 0]
         
     | 
| 
      
 129 
     | 
    
         
            +
            ISO2022CNCharLenTable = [0, 0, 0, 0, 0, 0, 0, 0, 0].freeze
         
     | 
| 
       129 
130 
     | 
    
         | 
| 
       130 
131 
     | 
    
         
             
            ISO2022CNSMModel = {'classTable' => ISO2022CN_cls,
         
     | 
| 
       131 
     | 
    
         
            -
             
     | 
| 
       132 
     | 
    
         
            -
             
     | 
| 
       133 
     | 
    
         
            -
             
     | 
| 
       134 
     | 
    
         
            -
             
     | 
| 
       135 
     | 
    
         
            -
            }
         
     | 
| 
      
 132 
     | 
    
         
            +
                'classFactor' => 9,
         
     | 
| 
      
 133 
     | 
    
         
            +
                'stateTable' => ISO2022CN_st,
         
     | 
| 
      
 134 
     | 
    
         
            +
                'charLenTable' => ISO2022CNCharLenTable,
         
     | 
| 
      
 135 
     | 
    
         
            +
                'name' => "ISO-2022-CN"
         
     | 
| 
      
 136 
     | 
    
         
            +
              }.freeze
         
     | 
| 
       136 
137 
     | 
    
         | 
| 
       137 
138 
     | 
    
         
             
            ISO2022JP_cls = [
         
     | 
| 
       138 
139 
     | 
    
         
             
            2,0,0,0,0,0,0,0,  # 00 - 07 
         
     | 
| 
         @@ -167,7 +168,7 @@ ISO2022JP_cls = [ 
     | 
|
| 
       167 
168 
     | 
    
         
             
            2,2,2,2,2,2,2,2,  # e8 - ef 
         
     | 
| 
       168 
169 
     | 
    
         
             
            2,2,2,2,2,2,2,2,  # f0 - f7 
         
     | 
| 
       169 
170 
     | 
    
         
             
            2,2,2,2,2,2,2,2,  # f8 - ff 
         
     | 
| 
       170 
     | 
    
         
            -
            ]
         
     | 
| 
      
 171 
     | 
    
         
            +
            ].freeze
         
     | 
| 
       171 
172 
     | 
    
         | 
| 
       172 
173 
     | 
    
         
             
            ISO2022JP_st = [ 
         
     | 
| 
       173 
174 
     | 
    
         
             
            EStart,     3,EError,EStart,EStart,EStart,EStart,EStart,# 00-07 
         
     | 
| 
         @@ -179,16 +180,16 @@ EError,EError,EError,     6,EItsMe,EError,EItsMe,EError,# 28-2f 
     | 
|
| 
       179 
180 
     | 
    
         
             
            EError,EError,EError,EError,EError,EError,EItsMe,EItsMe,# 30-37 
         
     | 
| 
       180 
181 
     | 
    
         
             
            EError,EError,EError,EItsMe,EError,EError,EError,EError,# 38-3f 
         
     | 
| 
       181 
182 
     | 
    
         
             
            EError,EError,EError,EError,EItsMe,EError,EStart,EStart,# 40-47 
         
     | 
| 
       182 
     | 
    
         
            -
            ]
         
     | 
| 
      
 183 
     | 
    
         
            +
            ].freeze
         
     | 
| 
       183 
184 
     | 
    
         | 
| 
       184 
     | 
    
         
            -
            ISO2022JPCharLenTable = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
         
     | 
| 
      
 185 
     | 
    
         
            +
            ISO2022JPCharLenTable = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0].freeze
         
     | 
| 
       185 
186 
     | 
    
         | 
| 
       186 
187 
     | 
    
         
             
            ISO2022JPSMModel = {'classTable' => ISO2022JP_cls,
         
     | 
| 
       187 
     | 
    
         
            -
             
     | 
| 
       188 
     | 
    
         
            -
             
     | 
| 
       189 
     | 
    
         
            -
             
     | 
| 
       190 
     | 
    
         
            -
             
     | 
| 
       191 
     | 
    
         
            -
            }
         
     | 
| 
      
 188 
     | 
    
         
            +
                'classFactor' => 10,
         
     | 
| 
      
 189 
     | 
    
         
            +
                'stateTable' => ISO2022JP_st,
         
     | 
| 
      
 190 
     | 
    
         
            +
                'charLenTable' => ISO2022JPCharLenTable,
         
     | 
| 
      
 191 
     | 
    
         
            +
                'name' => "ISO-2022-JP"
         
     | 
| 
      
 192 
     | 
    
         
            +
              }.freeze
         
     | 
| 
       192 
193 
     | 
    
         | 
| 
       193 
194 
     | 
    
         
             
            ISO2022KR_cls = [
         
     | 
| 
       194 
195 
     | 
    
         
             
            2,0,0,0,0,0,0,0,  # 00 - 07 
         
     | 
| 
         @@ -223,7 +224,7 @@ ISO2022KR_cls = [ 
     | 
|
| 
       223 
224 
     | 
    
         
             
            2,2,2,2,2,2,2,2,  # e8 - ef 
         
     | 
| 
       224 
225 
     | 
    
         
             
            2,2,2,2,2,2,2,2,  # f0 - f7 
         
     | 
| 
       225 
226 
     | 
    
         
             
            2,2,2,2,2,2,2,2,  # f8 - ff 
         
     | 
| 
       226 
     | 
    
         
            -
            ]
         
     | 
| 
      
 227 
     | 
    
         
            +
            ].freeze
         
     | 
| 
       227 
228 
     | 
    
         | 
| 
       228 
229 
     | 
    
         
             
            ISO2022KR_st = [ 
         
     | 
| 
       229 
230 
     | 
    
         
             
            EStart,     3,EError,EStart,EStart,EStart,EError,EError,# 00-07 
         
     | 
| 
         @@ -231,14 +232,14 @@ EError,EError,EError,EError,EItsMe,EItsMe,EItsMe,EItsMe,# 08-0f 
     | 
|
| 
       231 
232 
     | 
    
         
             
            EItsMe,EItsMe,EError,EError,EError,     4,EError,EError,# 10-17 
         
     | 
| 
       232 
233 
     | 
    
         
             
            EError,EError,EError,EError,     5,EError,EError,EError,# 18-1f 
         
     | 
| 
       233 
234 
     | 
    
         
             
            EError,EError,EError,EItsMe,EStart,EStart,EStart,EStart,# 20-27 
         
     | 
| 
       234 
     | 
    
         
            -
            ]
         
     | 
| 
      
 235 
     | 
    
         
            +
            ].freeze
         
     | 
| 
       235 
236 
     | 
    
         | 
| 
       236 
     | 
    
         
            -
            ISO2022KRCharLenTable = [0, 0, 0, 0, 0, 0]
         
     | 
| 
      
 237 
     | 
    
         
            +
            ISO2022KRCharLenTable = [0, 0, 0, 0, 0, 0].freeze
         
     | 
| 
       237 
238 
     | 
    
         | 
| 
       238 
239 
     | 
    
         
             
            ISO2022KRSMModel = {'classTable' => ISO2022KR_cls,
         
     | 
| 
       239 
     | 
    
         
            -
             
     | 
| 
       240 
     | 
    
         
            -
             
     | 
| 
       241 
     | 
    
         
            -
             
     | 
| 
       242 
     | 
    
         
            -
             
     | 
| 
       243 
     | 
    
         
            -
            }
         
     | 
| 
      
 240 
     | 
    
         
            +
                'classFactor' => 6,
         
     | 
| 
      
 241 
     | 
    
         
            +
                'stateTable' => ISO2022KR_st,
         
     | 
| 
      
 242 
     | 
    
         
            +
                'charLenTable' => ISO2022KRCharLenTable,
         
     | 
| 
      
 243 
     | 
    
         
            +
                'name' => "ISO-2022-KR"
         
     | 
| 
      
 244 
     | 
    
         
            +
              }.freeze
         
     | 
| 
       244 
245 
     | 
    
         
             
            end
         
     | 
    
        data/lib/rchardet/eucjpprober.rb
    CHANGED
    
    | 
         @@ -30,15 +30,15 @@ module CharDet 
     | 
|
| 
       30 
30 
     | 
    
         
             
              class EUCJPProber < MultiByteCharSetProber
         
     | 
| 
       31 
31 
     | 
    
         
             
                def initialize
         
     | 
| 
       32 
32 
     | 
    
         
             
                  super()
         
     | 
| 
       33 
     | 
    
         
            -
                  @ 
     | 
| 
       34 
     | 
    
         
            -
                  @ 
     | 
| 
       35 
     | 
    
         
            -
                  @ 
     | 
| 
      
 33 
     | 
    
         
            +
                  @codingSM = CodingStateMachine.new(EUCJPSMModel)
         
     | 
| 
      
 34 
     | 
    
         
            +
                  @distributionAnalyzer = EUCJPDistributionAnalysis.new()
         
     | 
| 
      
 35 
     | 
    
         
            +
                  @contextAnalyzer = EUCJPContextAnalysis.new()
         
     | 
| 
       36 
36 
     | 
    
         
             
                  reset
         
     | 
| 
       37 
37 
     | 
    
         
             
                end
         
     | 
| 
       38 
38 
     | 
    
         | 
| 
       39 
39 
     | 
    
         
             
                def reset
         
     | 
| 
       40 
40 
     | 
    
         
             
                  super()
         
     | 
| 
       41 
     | 
    
         
            -
                  @ 
     | 
| 
      
 41 
     | 
    
         
            +
                  @contextAnalyzer.reset()
         
     | 
| 
       42 
42 
     | 
    
         
             
                end
         
     | 
| 
       43 
43 
     | 
    
         | 
| 
       44 
44 
     | 
    
         
             
                def get_charset_name
         
     | 
| 
         @@ -48,40 +48,40 @@ module CharDet 
     | 
|
| 
       48 
48 
     | 
    
         
             
                def feed(aBuf)
         
     | 
| 
       49 
49 
     | 
    
         
             
                  aLen = aBuf.length
         
     | 
| 
       50 
50 
     | 
    
         
             
                  for i in (0...aLen)
         
     | 
| 
       51 
     | 
    
         
            -
             
     | 
| 
       52 
     | 
    
         
            -
             
     | 
| 
       53 
     | 
    
         
            -
             
     | 
| 
       54 
     | 
    
         
            -
             
     | 
| 
       55 
     | 
    
         
            -
             
     | 
| 
       56 
     | 
    
         
            -
             
     | 
| 
       57 
     | 
    
         
            -
             
     | 
| 
       58 
     | 
    
         
            -
             
     | 
| 
       59 
     | 
    
         
            -
             
     | 
| 
       60 
     | 
    
         
            -
             
     | 
| 
       61 
     | 
    
         
            -
             
     | 
| 
       62 
     | 
    
         
            -
             
     | 
| 
       63 
     | 
    
         
            -
             
     | 
| 
       64 
     | 
    
         
            -
             
     | 
| 
       65 
     | 
    
         
            -
             
     | 
| 
       66 
     | 
    
         
            -
             
     | 
| 
       67 
     | 
    
         
            -
             
     | 
| 
       68 
     | 
    
         
            -
             
     | 
| 
       69 
     | 
    
         
            -
             
     | 
| 
      
 51 
     | 
    
         
            +
                    codingState = @codingSM.next_state(aBuf[i, 1])
         
     | 
| 
      
 52 
     | 
    
         
            +
                    if codingState == EError
         
     | 
| 
      
 53 
     | 
    
         
            +
                      $stderr << "#{get_charset_name} prober hit error at byte #{i}\n" if $debug
         
     | 
| 
      
 54 
     | 
    
         
            +
                      @state = ENotMe
         
     | 
| 
      
 55 
     | 
    
         
            +
                      break
         
     | 
| 
      
 56 
     | 
    
         
            +
                    elsif codingState == EItsMe
         
     | 
| 
      
 57 
     | 
    
         
            +
                      @state = EFoundIt
         
     | 
| 
      
 58 
     | 
    
         
            +
                      break
         
     | 
| 
      
 59 
     | 
    
         
            +
                    elsif codingState == EStart
         
     | 
| 
      
 60 
     | 
    
         
            +
                      charLen = @codingSM.get_current_charlen()
         
     | 
| 
      
 61 
     | 
    
         
            +
                      if i == 0
         
     | 
| 
      
 62 
     | 
    
         
            +
                        @lastChar[1] = aBuf[0, 1]
         
     | 
| 
      
 63 
     | 
    
         
            +
                        @contextAnalyzer.feed(@lastChar, charLen)
         
     | 
| 
      
 64 
     | 
    
         
            +
                        @distributionAnalyzer.feed(@lastChar, charLen)
         
     | 
| 
      
 65 
     | 
    
         
            +
                      else
         
     | 
| 
      
 66 
     | 
    
         
            +
                        @contextAnalyzer.feed(aBuf[i-1, 2], charLen)
         
     | 
| 
      
 67 
     | 
    
         
            +
                        @distributionAnalyzer.feed(aBuf[i-1, 2], charLen)
         
     | 
| 
      
 68 
     | 
    
         
            +
                      end
         
     | 
| 
      
 69 
     | 
    
         
            +
                    end
         
     | 
| 
       70 
70 
     | 
    
         
             
                  end
         
     | 
| 
       71 
71 
     | 
    
         | 
| 
       72 
     | 
    
         
            -
                  @ 
     | 
| 
      
 72 
     | 
    
         
            +
                  @lastChar[0] = aBuf[aLen-1, 1]
         
     | 
| 
       73 
73 
     | 
    
         | 
| 
       74 
74 
     | 
    
         
             
                  if get_state() == EDetecting
         
     | 
| 
       75 
     | 
    
         
            -
             
     | 
| 
       76 
     | 
    
         
            -
             
     | 
| 
       77 
     | 
    
         
            -
             
     | 
| 
      
 75 
     | 
    
         
            +
                    if @contextAnalyzer.got_enough_data() and (get_confidence() > SHORTCUT_THRESHOLD)
         
     | 
| 
      
 76 
     | 
    
         
            +
                      @state = EFoundIt
         
     | 
| 
      
 77 
     | 
    
         
            +
                    end
         
     | 
| 
       78 
78 
     | 
    
         
             
                  end
         
     | 
| 
       79 
79 
     | 
    
         | 
| 
       80 
80 
     | 
    
         
             
                  return get_state()
         
     | 
| 
       81 
81 
     | 
    
         
             
                end
         
     | 
| 
       82 
82 
     | 
    
         | 
| 
       83 
83 
     | 
    
         
             
                def get_confidence
         
     | 
| 
       84 
     | 
    
         
            -
                  l = [@ 
     | 
| 
      
 84 
     | 
    
         
            +
                  l = [@contextAnalyzer.get_confidence,@distributionAnalyzer.get_confidence]
         
     | 
| 
       85 
85 
     | 
    
         
             
                  return l.max
         
     | 
| 
       86 
86 
     | 
    
         
             
                end
         
     | 
| 
       87 
87 
     | 
    
         
             
              end
         
     | 
    
        data/lib/rchardet/euckrfreq.rb
    CHANGED
    
    | 
         @@ -7,6 +7,7 @@ 
     | 
|
| 
       7 
7 
     | 
    
         
             
            # the Initial Developer. All Rights Reserved.
         
     | 
| 
       8 
8 
     | 
    
         
             
            #
         
     | 
| 
       9 
9 
     | 
    
         
             
            # Contributor(s):
         
     | 
| 
      
 10 
     | 
    
         
            +
            #   Jeff Hodges - port to Ruby
         
     | 
| 
       10 
11 
     | 
    
         
             
            #   Mark Pilgrim - port to Python
         
     | 
| 
       11 
12 
     | 
    
         
             
            #
         
     | 
| 
       12 
13 
     | 
    
         
             
            # This library is free software; you can redistribute it and/or
         
     | 
| 
         @@ -592,5 +593,5 @@ EUCKRCharToFreqOrder = [ 
     | 
|
| 
       592 
593 
     | 
    
         
             
            8704,8705,8706,8707,8708,8709,8710,8711,8712,8713,8714,8715,8716,8717,8718,8719,
         
     | 
| 
       593 
594 
     | 
    
         
             
            8720,8721,8722,8723,8724,8725,8726,8727,8728,8729,8730,8731,8732,8733,8734,8735,
         
     | 
| 
       594 
595 
     | 
    
         
             
            8736,8737,8738,8739,8740,8741
         
     | 
| 
       595 
     | 
    
         
            -
            ]
         
     | 
| 
      
 596 
     | 
    
         
            +
            ].freeze
         
     | 
| 
       596 
597 
     | 
    
         
             
            end
         
     | 
    
        data/lib/rchardet/euckrprober.rb
    CHANGED
    
    | 
         @@ -30,8 +30,8 @@ module CharDet 
     | 
|
| 
       30 
30 
     | 
    
         
             
              class EUCKRProber < MultiByteCharSetProber
         
     | 
| 
       31 
31 
     | 
    
         
             
                def initialize
         
     | 
| 
       32 
32 
     | 
    
         
             
                  super()
         
     | 
| 
       33 
     | 
    
         
            -
                  @ 
     | 
| 
       34 
     | 
    
         
            -
                  @ 
     | 
| 
      
 33 
     | 
    
         
            +
                  @codingSM = CodingStateMachine.new(EUCKRSMModel)
         
     | 
| 
      
 34 
     | 
    
         
            +
                  @distributionAnalyzer = EUCKRDistributionAnalysis.new()
         
     | 
| 
       35 
35 
     | 
    
         
             
                  reset()
         
     | 
| 
       36 
36 
     | 
    
         
             
                end
         
     | 
| 
       37 
37 
     | 
    
         | 
    
        data/lib/rchardet/euctwfreq.rb
    CHANGED
    
    | 
         @@ -426,5 +426,6 @@ EUCTWCharToFreqOrder = [ 
     | 
|
| 
       426 
426 
     | 
    
         
             
            8678,8679,8680,8681,8682,8683,8684,8685,8686,8687,8688,8689,8690,8691,8692,8693, # 8694
         
     | 
| 
       427 
427 
     | 
    
         
             
            8694,8695,8696,8697,8698,8699,8700,8701,8702,8703,8704,8705,8706,8707,8708,8709, # 8710
         
     | 
| 
       428 
428 
     | 
    
         
             
            8710,8711,8712,8713,8714,8715,8716,8717,8718,8719,8720,8721,8722,8723,8724,8725, # 8726
         
     | 
| 
       429 
     | 
    
         
            -
            8726,8727,8728,8729,8730,8731,8732,8733,8734,8735,8736,8737,8738,8739,8740,8741 
     | 
| 
      
 429 
     | 
    
         
            +
            8726,8727,8728,8729,8730,8731,8732,8733,8734,8735,8736,8737,8738,8739,8740,8741  # 8742
         
     | 
| 
      
 430 
     | 
    
         
            +
            ].freeze
         
     | 
| 
       430 
431 
     | 
    
         
             
            end
         
     | 
    
        data/lib/rchardet/euctwprober.rb
    CHANGED
    
    | 
         @@ -30,8 +30,8 @@ module CharDet 
     | 
|
| 
       30 
30 
     | 
    
         
             
              class EUCTWProber < MultiByteCharSetProber
         
     | 
| 
       31 
31 
     | 
    
         
             
                def initialize
         
     | 
| 
       32 
32 
     | 
    
         
             
                  super()
         
     | 
| 
       33 
     | 
    
         
            -
                  @ 
     | 
| 
       34 
     | 
    
         
            -
                  @ 
     | 
| 
      
 33 
     | 
    
         
            +
                  @codingSM = CodingStateMachine.new(EUCTWSMModel)
         
     | 
| 
      
 34 
     | 
    
         
            +
                  @distributionAnalyzer = EUCTWDistributionAnalysis.new()
         
     | 
| 
       35 
35 
     | 
    
         
             
                  reset()
         
     | 
| 
       36 
36 
     | 
    
         
             
                end
         
     | 
| 
       37 
37 
     | 
    
         | 
    
        data/lib/rchardet/gb2312freq.rb
    CHANGED
    
    | 
         @@ -469,6 +469,6 @@ GB2312CharToFreqOrder = [ 
     | 
|
| 
       469 
469 
     | 
    
         
             
            6271,3875,5768,6094,5034,5506,4376,5769,6761,2120,6476,5253,5770,6762,5771,5970,
         
     | 
| 
       470 
470 
     | 
    
         
             
            3990,5971,5557,5558,5772,6477,6095,2787,4641,5972,5121,6096,6097,6272,6763,3703,
         
     | 
| 
       471 
471 
     | 
    
         
             
            5867,5507,6273,4206,6274,4789,6098,6764,3619,3646,3833,3804,2394,3788,4936,3978,
         
     | 
| 
       472 
     | 
    
         
            -
            4866,4899,6099,6100,5559,6478,6765,3599,5868,6101,5869,5870,6275,6766,4527,6767 
     | 
| 
       473 
     | 
    
         
            -
             
     | 
| 
      
 472 
     | 
    
         
            +
            4866,4899,6099,6100,5559,6478,6765,3599,5868,6101,5869,5870,6275,6766,4527,6767
         
     | 
| 
      
 473 
     | 
    
         
            +
            ].freeze
         
     | 
| 
       474 
474 
     | 
    
         
             
            end
         
     | 
| 
         @@ -30,8 +30,8 @@ module CharDet 
     | 
|
| 
       30 
30 
     | 
    
         
             
              class GB2312Prober < MultiByteCharSetProber
         
     | 
| 
       31 
31 
     | 
    
         
             
                def initialize
         
     | 
| 
       32 
32 
     | 
    
         
             
                  super
         
     | 
| 
       33 
     | 
    
         
            -
                  @ 
     | 
| 
       34 
     | 
    
         
            -
                  @ 
     | 
| 
      
 33 
     | 
    
         
            +
                  @codingSM = CodingStateMachine.new(GB2312SMModel)
         
     | 
| 
      
 34 
     | 
    
         
            +
                  @distributionAnalyzer = GB2312DistributionAnalysis.new()
         
     | 
| 
       35 
35 
     | 
    
         
             
                  reset()
         
     | 
| 
       36 
36 
     | 
    
         
             
                end
         
     | 
| 
       37 
37 
     | 
    
         | 
| 
         @@ -150,25 +150,25 @@ module CharDet 
     | 
|
| 
       150 
150 
     | 
    
         
             
              class HebrewProber < CharSetProber
         
     | 
| 
       151 
151 
     | 
    
         
             
                def initialize
         
     | 
| 
       152 
152 
     | 
    
         
             
                  super()
         
     | 
| 
       153 
     | 
    
         
            -
                  @ 
     | 
| 
       154 
     | 
    
         
            -
                  @ 
     | 
| 
      
 153 
     | 
    
         
            +
                  @logicalProber = nil
         
     | 
| 
      
 154 
     | 
    
         
            +
                  @visualProber = nil
         
     | 
| 
       155 
155 
     | 
    
         
             
                  reset()
         
     | 
| 
       156 
156 
     | 
    
         
             
                end
         
     | 
| 
       157 
157 
     | 
    
         | 
| 
       158 
158 
     | 
    
         
             
                def reset
         
     | 
| 
       159 
     | 
    
         
            -
                  @ 
     | 
| 
       160 
     | 
    
         
            -
                  @ 
     | 
| 
      
 159 
     | 
    
         
            +
                  @finalCharLogicalScore = 0
         
     | 
| 
      
 160 
     | 
    
         
            +
                  @finalCharVisualScore = 0
         
     | 
| 
       161 
161 
     | 
    
         
             
                  # The two last characters seen in the previous buffer,
         
     | 
| 
       162 
162 
     | 
    
         
             
                  # mPrev and mBeforePrev are initialized to space in order to simulate a word 
         
     | 
| 
       163 
163 
     | 
    
         
             
                  # delimiter at the beginning of the data
         
     | 
| 
       164 
     | 
    
         
            -
                  @ 
     | 
| 
       165 
     | 
    
         
            -
                  @ 
     | 
| 
      
 164 
     | 
    
         
            +
                  @prev = ' '
         
     | 
| 
      
 165 
     | 
    
         
            +
                  @beforePrev = ' '
         
     | 
| 
       166 
166 
     | 
    
         
             
                  # These probers are owned by the group prober.
         
     | 
| 
       167 
167 
     | 
    
         
             
                end
         
     | 
| 
       168 
168 
     | 
    
         | 
| 
       169 
169 
     | 
    
         
             
                def set_model_probers(logicalProber, visualProber)
         
     | 
| 
       170 
     | 
    
         
            -
                  @ 
     | 
| 
       171 
     | 
    
         
            -
                  @ 
     | 
| 
      
 170 
     | 
    
         
            +
                  @logicalProber = logicalProber
         
     | 
| 
      
 171 
     | 
    
         
            +
                  @visualProber = visualProber
         
     | 
| 
       172 
172 
     | 
    
         
             
                end
         
     | 
| 
       173 
173 
     | 
    
         | 
| 
       174 
174 
     | 
    
         
             
                def is_final(c)
         
     | 
| 
         @@ -215,34 +215,34 @@ module CharDet 
     | 
|
| 
       215 
215 
     | 
    
         
             
                  # so the word boundary detection works properly. [MAP]
         
     | 
| 
       216 
216 
     | 
    
         | 
| 
       217 
217 
     | 
    
         
             
                  if get_state() == ENotMe
         
     | 
| 
       218 
     | 
    
         
            -
             
     | 
| 
       219 
     | 
    
         
            -
             
     | 
| 
      
 218 
     | 
    
         
            +
                    # Both model probers say it's not them. No reason to continue.
         
     | 
| 
      
 219 
     | 
    
         
            +
                    return ENotMe
         
     | 
| 
       220 
220 
     | 
    
         
             
                  end
         
     | 
| 
       221 
221 
     | 
    
         | 
| 
       222 
222 
     | 
    
         
             
                  aBuf = filter_high_bit_only(aBuf)
         
     | 
| 
       223 
223 
     | 
    
         | 
| 
       224 
224 
     | 
    
         
             
                  for cur in aBuf.split(' ')
         
     | 
| 
       225 
     | 
    
         
            -
             
     | 
| 
       226 
     | 
    
         
            -
             
     | 
| 
       227 
     | 
    
         
            -
             
     | 
| 
       228 
     | 
    
         
            -
             
     | 
| 
       229 
     | 
    
         
            -
             
     | 
| 
       230 
     | 
    
         
            -
             
     | 
| 
       231 
     | 
    
         
            -
             
     | 
| 
       232 
     | 
    
         
            -
             
     | 
| 
       233 
     | 
    
         
            -
             
     | 
| 
       234 
     | 
    
         
            -
             
     | 
| 
       235 
     | 
    
         
            -
             
     | 
| 
       236 
     | 
    
         
            -
             
     | 
| 
       237 
     | 
    
         
            -
             
     | 
| 
       238 
     | 
    
         
            -
             
     | 
| 
       239 
     | 
    
         
            -
             
     | 
| 
       240 
     | 
    
         
            -
             
     | 
| 
       241 
     | 
    
         
            -
             
     | 
| 
       242 
     | 
    
         
            -
             
     | 
| 
       243 
     | 
    
         
            -
             
     | 
| 
       244 
     | 
    
         
            -
             
     | 
| 
       245 
     | 
    
         
            -
             
     | 
| 
      
 225 
     | 
    
         
            +
                    if cur == ' '
         
     | 
| 
      
 226 
     | 
    
         
            +
                      # We stand on a space - a word just ended
         
     | 
| 
      
 227 
     | 
    
         
            +
                      if @beforePrev != ' '
         
     | 
| 
      
 228 
     | 
    
         
            +
                        # next-to-last char was not a space so self._mPrev is not a 1 letter word
         
     | 
| 
      
 229 
     | 
    
         
            +
                        if is_final(@prev)
         
     | 
| 
      
 230 
     | 
    
         
            +
                          # case (1) [-2:not space][-1:final letter][cur:space]
         
     | 
| 
      
 231 
     | 
    
         
            +
                          @finalCharLogicalScore += 1
         
     | 
| 
      
 232 
     | 
    
         
            +
                        elsif is_non_final(@prev)
         
     | 
| 
      
 233 
     | 
    
         
            +
                          # case (2) [-2:not space][-1:Non-Final letter][cur:space]
         
     | 
| 
      
 234 
     | 
    
         
            +
                          @finalCharVisualScore += 1
         
     | 
| 
      
 235 
     | 
    
         
            +
                        end
         
     | 
| 
      
 236 
     | 
    
         
            +
                      end
         
     | 
| 
      
 237 
     | 
    
         
            +
                    else
         
     | 
| 
      
 238 
     | 
    
         
            +
                      # Not standing on a space
         
     | 
| 
      
 239 
     | 
    
         
            +
                      if (@beforePrev == ' ') and (is_final(@prev)) and (cur != ' ')
         
     | 
| 
      
 240 
     | 
    
         
            +
                        # case (3) [-2:space][-1:final letter][cur:not space]
         
     | 
| 
      
 241 
     | 
    
         
            +
                        @finalCharVisualScore += 1
         
     | 
| 
      
 242 
     | 
    
         
            +
                      end
         
     | 
| 
      
 243 
     | 
    
         
            +
                    end
         
     | 
| 
      
 244 
     | 
    
         
            +
                    @beforePrev = @prev
         
     | 
| 
      
 245 
     | 
    
         
            +
                    @prev = cur
         
     | 
| 
       246 
246 
     | 
    
         
             
                  end
         
     | 
| 
       247 
247 
     | 
    
         | 
| 
       248 
248 
     | 
    
         
             
                  # Forever detecting, till the end or until both model probers return eNotMe (handled above)
         
     | 
| 
         @@ -252,26 +252,26 @@ module CharDet 
     | 
|
| 
       252 
252 
     | 
    
         
             
                def get_charset_name
         
     | 
| 
       253 
253 
     | 
    
         
             
                  # Make the decision: is it Logical or Visual?
         
     | 
| 
       254 
254 
     | 
    
         
             
                  # If the final letter score distance is dominant enough, rely on it.
         
     | 
| 
       255 
     | 
    
         
            -
                  finalsub = @ 
     | 
| 
      
 255 
     | 
    
         
            +
                  finalsub = @finalCharLogicalScore - @finalCharVisualScore
         
     | 
| 
       256 
256 
     | 
    
         
             
                  if finalsub >= MIN_FINAL_CHAR_DISTANCE
         
     | 
| 
       257 
     | 
    
         
            -
             
     | 
| 
      
 257 
     | 
    
         
            +
                    return LOGICAL_HEBREW_NAME
         
     | 
| 
       258 
258 
     | 
    
         
             
                  end
         
     | 
| 
       259 
259 
     | 
    
         
             
                  if finalsub <= -MIN_FINAL_CHAR_DISTANCE
         
     | 
| 
       260 
     | 
    
         
            -
             
     | 
| 
      
 260 
     | 
    
         
            +
                    return VISUAL_HEBREW_NAME
         
     | 
| 
       261 
261 
     | 
    
         
             
                  end
         
     | 
| 
       262 
262 
     | 
    
         | 
| 
       263 
263 
     | 
    
         
             
                  # It's not dominant enough, try to rely on the model scores instead.
         
     | 
| 
       264 
     | 
    
         
            -
                  modelsub = @ 
     | 
| 
      
 264 
     | 
    
         
            +
                  modelsub = @logicalProber.get_confidence() - @visualProber.get_confidence()
         
     | 
| 
       265 
265 
     | 
    
         
             
                  if modelsub > MIN_MODEL_DISTANCE
         
     | 
| 
       266 
     | 
    
         
            -
             
     | 
| 
      
 266 
     | 
    
         
            +
                    return LOGICAL_HEBREW_NAME
         
     | 
| 
       267 
267 
     | 
    
         
             
                  end
         
     | 
| 
       268 
268 
     | 
    
         
             
                  if modelsub < -MIN_MODEL_DISTANCE
         
     | 
| 
       269 
     | 
    
         
            -
             
     | 
| 
      
 269 
     | 
    
         
            +
                    return VISUAL_HEBREW_NAME
         
     | 
| 
       270 
270 
     | 
    
         
             
                  end
         
     | 
| 
       271 
271 
     | 
    
         | 
| 
       272 
272 
     | 
    
         
             
                  # Still no good, back to final letter distance, maybe it'll save the day.
         
     | 
| 
       273 
273 
     | 
    
         
             
                  if finalsub < 0.0
         
     | 
| 
       274 
     | 
    
         
            -
             
     | 
| 
      
 274 
     | 
    
         
            +
                    return VISUAL_HEBREW_NAME
         
     | 
| 
       275 
275 
     | 
    
         
             
                  end
         
     | 
| 
       276 
276 
     | 
    
         | 
| 
       277 
277 
     | 
    
         
             
                  # (finalsub > 0 - Logical) or (don't know what to do) default to Logical.
         
     | 
| 
         @@ -280,8 +280,8 @@ module CharDet 
     | 
|
| 
       280 
280 
     | 
    
         | 
| 
       281 
281 
     | 
    
         
             
                def get_state
         
     | 
| 
       282 
282 
     | 
    
         
             
                  # Remain active as long as any of the model probers are active.
         
     | 
| 
       283 
     | 
    
         
            -
                  if (@ 
     | 
| 
       284 
     | 
    
         
            -
             
     | 
| 
      
 283 
     | 
    
         
            +
                  if (@logicalProber.get_state() == ENotMe) and (@visualProber.get_state() == ENotMe)
         
     | 
| 
      
 284 
     | 
    
         
            +
                    return ENotMe
         
     | 
| 
       285 
285 
     | 
    
         
             
                  end
         
     | 
| 
       286 
286 
     | 
    
         
             
                  return EDetecting
         
     | 
| 
       287 
287 
     | 
    
         
             
                end
         
     |