chardet 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/COPYING +504 -0
- data/README +12 -0
- data/lib/Big5Freq.rb +913 -0
- data/lib/Big5Prober.rb +48 -0
- data/lib/CharDistributionAnalysis.rb +245 -0
- data/lib/CharSetGroupProber.rb +114 -0
- data/lib/CharSetProber.rb +70 -0
- data/lib/CodingStateMachine.rb +74 -0
- data/lib/ESCSM.rb +242 -0
- data/lib/EUCJPProber.rb +97 -0
- data/lib/EUCKRFreq.rb +600 -0
- data/lib/EUCKRProber.rb +48 -0
- data/lib/EUCTWFreq.rb +432 -0
- data/lib/EUCTWProber.rb +48 -0
- data/lib/EscCharSetProber.rb +94 -0
- data/lib/GB2312Freq.rb +475 -0
- data/lib/GB2312Prober.rb +48 -0
- data/lib/HebrewProber.rb +292 -0
- data/lib/JISFreq.rb +573 -0
- data/lib/JapaneseContextAnalysis.rb +234 -0
- data/lib/LangBulgarianModel.rb +231 -0
- data/lib/LangCyrillicModel.rb +332 -0
- data/lib/LangGreekModel.rb +229 -0
- data/lib/LangHebrewModel.rb +202 -0
- data/lib/LangHungarianModel.rb +228 -0
- data/lib/LangThaiModel.rb +203 -0
- data/lib/Latin1Prober.rb +160 -0
- data/lib/MBCSGroupProber.rb +57 -0
- data/lib/MBCSSM.rb +513 -0
- data/lib/MultiByteCharSetProber.rb +94 -0
- data/lib/SBCSGroupProber.rb +71 -0
- data/lib/SJISProber.rb +99 -0
- data/lib/SingleByteCharSetProber.rb +131 -0
- data/lib/UTF8Prober.rb +91 -0
- data/lib/UniversalDetector.rb +209 -0
- data/python-docs/css/chardet.css +299 -0
- data/python-docs/faq.html +107 -0
- data/python-docs/how-it-works.html +113 -0
- data/python-docs/images/caution.png +0 -0
- data/python-docs/images/important.png +0 -0
- data/python-docs/images/note.png +0 -0
- data/python-docs/images/permalink.gif +0 -0
- data/python-docs/images/tip.png +0 -0
- data/python-docs/images/warning.png +0 -0
- data/python-docs/index.html +73 -0
- data/python-docs/license.html +62 -0
- data/python-docs/supported-encodings.html +86 -0
- data/python-docs/usage.html +107 -0
- metadata +86 -0
| @@ -0,0 +1,202 @@ | |
| 1 | 
            +
            ######################## BEGIN LICENSE BLOCK ########################
         | 
| 2 | 
            +
            # The Original Code is mozilla.org code.
         | 
| 3 | 
            +
            #
         | 
| 4 | 
            +
            # The Initial Developer of the Original Code is
         | 
| 5 | 
            +
            # Netscape Communications Corporation.
         | 
| 6 | 
            +
            # Portions created by the Initial Developer are Copyright (C) 1998
         | 
| 7 | 
            +
            # the Initial Developer. All Rights Reserved.
         | 
| 8 | 
            +
            #
         | 
| 9 | 
            +
            # Contributor(s):
         | 
| 10 | 
            +
            #   Hui (zhengzhengzheng@gmail.com) - port to Ruby
         | 
| 11 | 
            +
            #   Mark Pilgrim - first port to Python
         | 
| 12 | 
            +
            #
         | 
| 13 | 
            +
            # This library is free software; you can redistribute it and/or
         | 
| 14 | 
            +
            # modify it under the terms of the GNU Lesser General Public
         | 
| 15 | 
            +
            # License as published by the Free Software Foundation; either
         | 
| 16 | 
            +
            # version 2.1 of the License, or (at your option) any later version.
         | 
| 17 | 
            +
            # 
         | 
| 18 | 
            +
            # This library is distributed in the hope that it will be useful,
         | 
| 19 | 
            +
            # but WITHOUT ANY WARRANTY; without even the implied warranty of
         | 
| 20 | 
            +
            # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
         | 
| 21 | 
            +
            # Lesser General Public License for more details.
         | 
| 22 | 
            +
            # 
         | 
| 23 | 
            +
            # You should have received a copy of the GNU Lesser General Public
         | 
| 24 | 
            +
            # License along with this library; if not, write to the Free Software
         | 
| 25 | 
            +
            # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
         | 
| 26 | 
            +
            # 02110-1301  USA
         | 
| 27 | 
            +
            ######################### END LICENSE BLOCK #########################
         | 
| 28 | 
            +
             | 
| 29 | 
            +
            require 'UniversalDetector'
         | 
| 30 | 
            +
             | 
| 31 | 
            +
            module UniversalDetector
         | 
| 32 | 
            +
                # 255 => Control characters that usually does not exist in any text
         | 
| 33 | 
            +
                # 254 => Carriage/Return
         | 
| 34 | 
            +
                # 253 => symbol [punctuation] that does not belong to word
         | 
| 35 | 
            +
                # 252 => 0 - 9
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                # Windows-1255 language model
         | 
| 38 | 
            +
                # Character Mapping Table =>
         | 
| 39 | 
            +
                Win1255_CharToOrderMap = [ \
         | 
| 40 | 
            +
                255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
         | 
| 41 | 
            +
                255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
         | 
| 42 | 
            +
                253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
         | 
| 43 | 
            +
                252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30
         | 
| 44 | 
            +
                253, 69, 91, 79, 80, 92, 89, 97, 90, 68,111,112, 82, 73, 95, 85,  # 40
         | 
| 45 | 
            +
                 78,121, 86, 71, 67,102,107, 84,114,103,115,253,253,253,253,253,  # 50
         | 
| 46 | 
            +
                253, 50, 74, 60, 61, 42, 76, 70, 64, 53,105, 93, 56, 65, 54, 49,  # 60
         | 
| 47 | 
            +
                 66,110, 51, 43, 44, 63, 81, 77, 98, 75,108,253,253,253,253,253,  # 70
         | 
| 48 | 
            +
                124,202,203,204,205, 40, 58,206,207,208,209,210,211,212,213,214,
         | 
| 49 | 
            +
                215, 83, 52, 47, 46, 72, 32, 94,216,113,217,109,218,219,220,221,
         | 
| 50 | 
            +
                 34,116,222,118,100,223,224,117,119,104,125,225,226, 87, 99,227,
         | 
| 51 | 
            +
                106,122,123,228, 55,229,230,101,231,232,120,233, 48, 39, 57,234,
         | 
| 52 | 
            +
                 30, 59, 41, 88, 33, 37, 36, 31, 29, 35,235, 62, 28,236,126,237,
         | 
| 53 | 
            +
                238, 38, 45,239,240,241,242,243,127,244,245,246,247,248,249,250,
         | 
| 54 | 
            +
                  9,  8, 20, 16,  3,  2, 24, 14, 22,  1, 25, 15,  4, 11,  6, 23,
         | 
| 55 | 
            +
                 12, 19, 13, 26, 18, 27, 21, 17,  7, 10,  5,251,252,128, 96,253,
         | 
| 56 | 
            +
                ]
         | 
| 57 | 
            +
             | 
| 58 | 
            +
                # Model Table => 
         | 
| 59 | 
            +
                # total sequences => 100%
         | 
| 60 | 
            +
                # first 512 sequences => 98.4004%
         | 
| 61 | 
            +
                # first 1024 sequences => 1.5981%
         | 
| 62 | 
            +
                # rest  sequences =>      0.087%
         | 
| 63 | 
            +
                # negative sequences =>   0.0015% 
         | 
| 64 | 
            +
                HebrewLangModel = [ \
         | 
| 65 | 
            +
                0,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,2,1,2,0,1,0,0,
         | 
| 66 | 
            +
                3,0,3,1,0,0,1,3,2,0,1,1,2,0,2,2,2,1,1,1,1,2,1,1,1,2,0,0,2,2,0,1,
         | 
| 67 | 
            +
                3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,
         | 
| 68 | 
            +
                1,2,1,2,1,2,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
         | 
| 69 | 
            +
                3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,
         | 
| 70 | 
            +
                1,2,1,3,1,1,0,0,2,0,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
         | 
| 71 | 
            +
                3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,1,2,2,1,3,
         | 
| 72 | 
            +
                1,2,1,1,2,2,0,0,2,2,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,0,
         | 
| 73 | 
            +
                3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,2,2,3,2,
         | 
| 74 | 
            +
                1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
         | 
| 75 | 
            +
                3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,2,3,2,2,2,1,2,2,2,2,
         | 
| 76 | 
            +
                1,2,1,1,2,2,0,1,2,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,
         | 
| 77 | 
            +
                3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,2,2,2,2,2,
         | 
| 78 | 
            +
                0,2,0,2,2,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
         | 
| 79 | 
            +
                3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,2,2,
         | 
| 80 | 
            +
                0,2,1,2,2,2,0,0,2,1,0,0,0,0,1,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,
         | 
| 81 | 
            +
                3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,2,3,2,2,2,
         | 
| 82 | 
            +
                1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,
         | 
| 83 | 
            +
                3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,2,0,2,
         | 
| 84 | 
            +
                0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,2,0,0,1,0,
         | 
| 85 | 
            +
                3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,3,2,1,2,1,1,1,
         | 
| 86 | 
            +
                0,1,1,1,1,1,3,0,1,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
         | 
| 87 | 
            +
                3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,
         | 
| 88 | 
            +
                0,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 89 | 
            +
                3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,
         | 
| 90 | 
            +
                0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
         | 
| 91 | 
            +
                3,3,3,3,3,3,3,3,3,2,3,3,3,2,1,2,3,3,2,3,3,3,3,2,3,2,1,2,0,2,1,2,
         | 
| 92 | 
            +
                0,2,0,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,
         | 
| 93 | 
            +
                3,3,3,3,3,3,3,3,3,2,3,3,3,1,2,2,3,3,2,3,2,3,2,2,3,1,2,2,0,2,2,2,
         | 
| 94 | 
            +
                0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0,
         | 
| 95 | 
            +
                3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,2,2,3,3,3,3,1,3,2,2,2,
         | 
| 96 | 
            +
                0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
         | 
| 97 | 
            +
                3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,3,2,2,2,1,2,2,0,2,2,2,2,
         | 
| 98 | 
            +
                0,2,0,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
         | 
| 99 | 
            +
                3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,1,3,2,3,3,2,3,3,2,2,1,2,2,2,2,2,2,
         | 
| 100 | 
            +
                0,2,1,2,1,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,
         | 
| 101 | 
            +
                3,3,3,3,3,3,2,3,2,3,3,2,3,3,3,3,2,3,2,3,3,3,3,3,2,2,2,2,2,2,2,1,
         | 
| 102 | 
            +
                0,2,0,1,2,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
         | 
| 103 | 
            +
                3,3,3,3,3,3,3,3,3,2,1,2,3,3,3,3,3,3,3,2,3,2,3,2,1,2,3,0,2,1,2,2,
         | 
| 104 | 
            +
                0,2,1,1,2,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0,
         | 
| 105 | 
            +
                3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,1,3,1,2,2,2,1,2,3,3,1,2,1,2,2,2,2,
         | 
| 106 | 
            +
                0,1,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,
         | 
| 107 | 
            +
                3,3,3,3,3,3,3,3,3,3,0,2,3,3,3,1,3,3,3,1,2,2,2,2,1,1,2,2,2,2,2,2,
         | 
| 108 | 
            +
                0,2,0,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
         | 
| 109 | 
            +
                3,3,3,3,3,3,2,3,3,3,2,2,3,3,3,2,1,2,3,2,3,2,2,2,2,1,2,1,1,1,2,2,
         | 
| 110 | 
            +
                0,2,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
         | 
| 111 | 
            +
                3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0,0,
         | 
| 112 | 
            +
                1,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 113 | 
            +
                3,3,3,3,3,2,3,3,2,3,1,2,2,2,2,3,2,3,1,1,2,2,1,2,2,1,1,0,2,2,2,2,
         | 
| 114 | 
            +
                0,1,0,1,2,2,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
         | 
| 115 | 
            +
                3,0,0,1,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,2,0,
         | 
| 116 | 
            +
                0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 117 | 
            +
                3,0,1,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0,
         | 
| 118 | 
            +
                0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 119 | 
            +
                3,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 120 | 
            +
                0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
         | 
| 121 | 
            +
                3,2,2,1,2,2,2,2,2,2,2,1,2,2,1,2,2,1,1,1,1,1,1,1,1,2,1,1,0,3,3,3,
         | 
| 122 | 
            +
                0,3,0,2,2,2,2,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
         | 
| 123 | 
            +
                2,2,2,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,2,1,2,2,2,1,1,1,2,0,1,
         | 
| 124 | 
            +
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 125 | 
            +
                2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,2,2,2,2,2,2,0,2,2,0,0,0,0,0,0,
         | 
| 126 | 
            +
                0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 127 | 
            +
                2,3,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,1,0,2,1,0,
         | 
| 128 | 
            +
                0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 129 | 
            +
                3,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,
         | 
| 130 | 
            +
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
         | 
| 131 | 
            +
                0,3,1,1,2,2,2,2,2,1,2,2,2,1,1,2,2,2,2,2,2,2,1,2,2,1,0,1,1,1,1,0,
         | 
| 132 | 
            +
                0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 133 | 
            +
                3,2,1,1,1,1,2,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0,
         | 
| 134 | 
            +
                0,0,2,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,0,0,
         | 
| 135 | 
            +
                2,1,1,2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,1,2,1,2,1,1,1,1,0,0,0,0,
         | 
| 136 | 
            +
                0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 137 | 
            +
                1,2,1,2,2,2,2,2,2,2,2,2,2,1,2,1,2,1,1,2,1,1,1,2,1,2,1,2,0,1,0,1,
         | 
| 138 | 
            +
                0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 139 | 
            +
                0,3,1,2,2,2,1,2,2,2,2,2,2,2,2,1,2,1,1,1,1,1,1,2,1,2,1,1,0,1,0,1,
         | 
| 140 | 
            +
                0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 141 | 
            +
                2,1,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,
         | 
| 142 | 
            +
                0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
         | 
| 143 | 
            +
                3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
         | 
| 144 | 
            +
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 145 | 
            +
                2,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,
         | 
| 146 | 
            +
                0,0,0,0,0,0,0,0,2,0,1,1,1,0,1,0,0,0,1,1,0,1,1,0,0,0,0,0,1,1,0,0,
         | 
| 147 | 
            +
                0,1,1,1,2,1,2,2,2,0,2,0,2,0,1,1,2,1,1,1,1,2,1,0,1,1,0,0,0,0,0,0,
         | 
| 148 | 
            +
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 149 | 
            +
                2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 150 | 
            +
                1,0,1,0,0,0,0,0,1,0,1,2,2,0,1,0,0,1,1,2,2,1,2,0,2,0,0,0,1,2,0,1,
         | 
| 151 | 
            +
                2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 152 | 
            +
                0,0,0,0,0,0,0,0,2,0,2,1,2,0,2,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,1,
         | 
| 153 | 
            +
                2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 154 | 
            +
                0,0,1,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,1,2,2,0,0,1,0,0,0,1,0,0,1,
         | 
| 155 | 
            +
                1,1,2,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,2,1,
         | 
| 156 | 
            +
                0,2,0,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 157 | 
            +
                2,1,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,
         | 
| 158 | 
            +
                0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 159 | 
            +
                1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0,
         | 
| 160 | 
            +
                0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,
         | 
| 161 | 
            +
                2,0,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 162 | 
            +
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 163 | 
            +
                1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 164 | 
            +
                0,0,0,0,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,2,1,1,2,0,1,0,0,0,1,1,0,1,
         | 
| 165 | 
            +
                1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 166 | 
            +
                0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,0,0,2,1,1,2,0,2,0,0,0,1,1,0,1,
         | 
| 167 | 
            +
                1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 168 | 
            +
                0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,2,2,1,2,1,1,0,1,0,0,0,1,1,0,1,
         | 
| 169 | 
            +
                2,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 170 | 
            +
                0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,
         | 
| 171 | 
            +
                1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 172 | 
            +
                0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,2,1,1,1,0,2,1,1,0,0,0,2,1,0,1,
         | 
| 173 | 
            +
                1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 174 | 
            +
                0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,0,2,1,1,0,1,0,0,0,1,1,0,1,
         | 
| 175 | 
            +
                2,2,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,
         | 
| 176 | 
            +
                0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 177 | 
            +
                2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 178 | 
            +
                0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,0,1,2,1,0,2,0,0,0,1,1,0,1,
         | 
| 179 | 
            +
                2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 180 | 
            +
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 181 | 
            +
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 182 | 
            +
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,
         | 
| 183 | 
            +
                0,1,0,0,2,0,2,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,
         | 
| 184 | 
            +
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 185 | 
            +
                1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 186 | 
            +
                0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,
         | 
| 187 | 
            +
                1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 188 | 
            +
                1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,2,1,1,1,1,1,0,1,0,0,0,0,1,0,1,
         | 
| 189 | 
            +
                0,1,1,1,2,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,
         | 
| 190 | 
            +
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 191 | 
            +
                1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 192 | 
            +
                0,0,0,0,0,0,0,0,0,0,1,2,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0,
         | 
| 193 | 
            +
                ]
         | 
| 194 | 
            +
             | 
| 195 | 
            +
                Win1255HebrewModel = { \
         | 
| 196 | 
            +
                  'charToOrderMap' => Win1255_CharToOrderMap,
         | 
| 197 | 
            +
                  'precedenceMatrix' => HebrewLangModel,
         | 
| 198 | 
            +
                  'mTypicalPositiveRatio' => 0.984004,
         | 
| 199 | 
            +
                  'keepEnglishLetter' => false,
         | 
| 200 | 
            +
                  'charsetName' => "windows-1255"
         | 
| 201 | 
            +
                }
         | 
| 202 | 
            +
            end
         | 
| @@ -0,0 +1,228 @@ | |
| 1 | 
            +
            ######################## BEGIN LICENSE BLOCK ########################
         | 
| 2 | 
            +
            # The Original Code is mozilla.org code.
         | 
| 3 | 
            +
            #
         | 
| 4 | 
            +
            # The Initial Developer of the Original Code is
         | 
| 5 | 
            +
            # Netscape Communications Corporation.
         | 
| 6 | 
            +
            # Portions created by the Initial Developer are Copyright (C) 1998
         | 
| 7 | 
            +
            # the Initial Developer. All Rights Reserved.
         | 
| 8 | 
            +
            #
         | 
| 9 | 
            +
            # Contributor(s):
         | 
| 10 | 
            +
            #   Hui (zhengzhengzheng@gmail.com) - port to Ruby
         | 
| 11 | 
            +
            #   Mark Pilgrim - first port to Python
         | 
| 12 | 
            +
            #
         | 
| 13 | 
            +
            # This library is free software; you can redistribute it and/or
         | 
| 14 | 
            +
            # modify it under the terms of the GNU Lesser General Public
         | 
| 15 | 
            +
            # License as published by the Free Software Foundation; either
         | 
| 16 | 
            +
            # version 2.1 of the License, or (at your option) any later version.
         | 
| 17 | 
            +
            # 
         | 
| 18 | 
            +
            # This library is distributed in the hope that it will be useful,
         | 
| 19 | 
            +
            # but WITHOUT ANY WARRANTY; without even the implied warranty of
         | 
| 20 | 
            +
            # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
         | 
| 21 | 
            +
            # Lesser General Public License for more details.
         | 
| 22 | 
            +
            # 
         | 
| 23 | 
            +
            # You should have received a copy of the GNU Lesser General Public
         | 
| 24 | 
            +
            # License along with this library; if not, write to the Free Software
         | 
| 25 | 
            +
            # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
         | 
| 26 | 
            +
            # 02110-1301  USA
         | 
| 27 | 
            +
            ######################### END LICENSE BLOCK #########################
         | 
| 28 | 
            +
             | 
| 29 | 
            +
            require 'UniversalDetector'
         | 
| 30 | 
            +
             | 
| 31 | 
            +
            module UniversalDetector
         | 
| 32 | 
            +
                # 255 => Control characters that usually does not exist in any text
         | 
| 33 | 
            +
                # 254 => Carriage/Return
         | 
| 34 | 
            +
                # 253 => symbol [punctuation] that does not belong to word
         | 
| 35 | 
            +
                # 252 => 0 - 9
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                # Character Mapping Table =>
         | 
| 38 | 
            +
                Latin2_HungarianCharToOrderMap = [ \
         | 
| 39 | 
            +
                255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
         | 
| 40 | 
            +
                255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
         | 
| 41 | 
            +
                253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
         | 
| 42 | 
            +
                252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30
         | 
| 43 | 
            +
                253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47,
         | 
| 44 | 
            +
                 46, 71, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253,
         | 
| 45 | 
            +
                253,  2, 18, 26, 17,  1, 27, 12, 20,  9, 22,  7,  6, 13,  4,  8,
         | 
| 46 | 
            +
                 23, 67, 10,  5,  3, 21, 19, 65, 62, 16, 11,253,253,253,253,253,
         | 
| 47 | 
            +
                159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,
         | 
| 48 | 
            +
                175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,
         | 
| 49 | 
            +
                191,192,193,194,195,196,197, 75,198,199,200,201,202,203,204,205,
         | 
| 50 | 
            +
                 79,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,
         | 
| 51 | 
            +
                221, 51, 81,222, 78,223,224,225,226, 44,227,228,229, 61,230,231,
         | 
| 52 | 
            +
                232,233,234, 58,235, 66, 59,236,237,238, 60, 69, 63,239,240,241,
         | 
| 53 | 
            +
                 82, 14, 74,242, 70, 80,243, 72,244, 15, 83, 77, 84, 30, 76, 85,
         | 
| 54 | 
            +
                245,246,247, 25, 73, 42, 24,248,249,250, 31, 56, 29,251,252,253,
         | 
| 55 | 
            +
                ]
         | 
| 56 | 
            +
             | 
| 57 | 
            +
                Win1250HungarianCharToOrderMap = [ \
         | 
| 58 | 
            +
                255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
         | 
| 59 | 
            +
                255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
         | 
| 60 | 
            +
                253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
         | 
| 61 | 
            +
                252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30
         | 
| 62 | 
            +
                253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47,
         | 
| 63 | 
            +
                 46, 72, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253,
         | 
| 64 | 
            +
                253,  2, 18, 26, 17,  1, 27, 12, 20,  9, 22,  7,  6, 13,  4,  8,
         | 
| 65 | 
            +
                 23, 67, 10,  5,  3, 21, 19, 65, 62, 16, 11,253,253,253,253,253,
         | 
| 66 | 
            +
                161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,
         | 
| 67 | 
            +
                177,178,179,180, 78,181, 69,182,183,184,185,186,187,188,189,190,
         | 
| 68 | 
            +
                191,192,193,194,195,196,197, 76,198,199,200,201,202,203,204,205,
         | 
| 69 | 
            +
                 81,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,
         | 
| 70 | 
            +
                221, 51, 83,222, 80,223,224,225,226, 44,227,228,229, 61,230,231,
         | 
| 71 | 
            +
                232,233,234, 58,235, 66, 59,236,237,238, 60, 70, 63,239,240,241,
         | 
| 72 | 
            +
                 84, 14, 75,242, 71, 82,243, 73,244, 15, 85, 79, 86, 30, 77, 87,
         | 
| 73 | 
            +
                245,246,247, 25, 74, 42, 24,248,249,250, 31, 56, 29,251,252,253,
         | 
| 74 | 
            +
                ]
         | 
| 75 | 
            +
             | 
| 76 | 
            +
                # Model Table => 
         | 
| 77 | 
            +
                # total sequences => 100%
         | 
| 78 | 
            +
                # first 512 sequences => 94.7368%
         | 
| 79 | 
            +
                # first 1024 sequences =>5.2623%
         | 
| 80 | 
            +
                # rest  sequences =>     0.8894%
         | 
| 81 | 
            +
                # negative sequences =>  0.0009% 
         | 
| 82 | 
            +
                HungarianLangModel = [ \
         | 
| 83 | 
            +
                0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
         | 
| 84 | 
            +
                3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,3,3,1,1,2,2,2,2,2,1,2,
         | 
| 85 | 
            +
                3,2,2,3,3,3,3,3,2,3,3,3,3,3,3,1,2,3,3,3,3,2,3,3,1,1,3,3,0,1,1,1,
         | 
| 86 | 
            +
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,
         | 
| 87 | 
            +
                3,2,1,3,3,3,3,3,2,3,3,3,3,3,1,1,2,3,3,3,3,3,3,3,1,1,3,2,0,1,1,1,
         | 
| 88 | 
            +
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
         | 
| 89 | 
            +
                3,3,3,3,3,3,3,3,3,3,3,1,1,2,3,3,3,1,3,3,3,3,3,1,3,3,2,2,0,3,2,3,
         | 
| 90 | 
            +
                0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
         | 
| 91 | 
            +
                3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,3,3,2,3,3,2,2,3,2,3,2,0,3,2,2,
         | 
| 92 | 
            +
                0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,
         | 
| 93 | 
            +
                3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,1,2,3,2,2,3,1,2,3,3,2,2,0,3,3,3,
         | 
| 94 | 
            +
                0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
         | 
| 95 | 
            +
                3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,0,2,3,2,
         | 
| 96 | 
            +
                0,0,0,1,1,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
         | 
| 97 | 
            +
                3,3,3,3,3,3,3,3,3,3,3,1,1,1,3,3,2,1,3,2,2,3,2,1,3,2,2,1,0,3,3,1,
         | 
| 98 | 
            +
                0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
         | 
| 99 | 
            +
                3,2,2,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,3,2,2,3,1,1,3,2,0,1,1,1,
         | 
| 100 | 
            +
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
         | 
| 101 | 
            +
                3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,1,3,3,3,3,3,2,2,1,3,3,3,0,1,1,2,
         | 
| 102 | 
            +
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,
         | 
| 103 | 
            +
                3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,2,0,3,2,3,
         | 
| 104 | 
            +
                0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,1,0,
         | 
| 105 | 
            +
                3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,1,3,2,2,2,3,1,1,3,3,1,1,0,3,3,2,
         | 
| 106 | 
            +
                0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
         | 
| 107 | 
            +
                3,3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,2,3,3,3,3,3,1,2,3,2,2,0,2,2,2,
         | 
| 108 | 
            +
                0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
         | 
| 109 | 
            +
                3,3,3,2,2,2,3,1,3,3,2,2,1,3,3,3,1,1,3,1,2,3,2,3,2,2,2,1,0,2,2,2,
         | 
| 110 | 
            +
                0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
         | 
| 111 | 
            +
                3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,2,2,3,2,1,0,3,2,0,1,1,0,
         | 
| 112 | 
            +
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 113 | 
            +
                3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,1,0,3,3,3,3,0,2,3,0,0,2,1,0,1,0,0,
         | 
| 114 | 
            +
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 115 | 
            +
                3,3,3,3,3,3,2,2,3,3,2,2,2,2,3,3,0,1,2,3,2,3,2,2,3,2,1,2,0,2,2,2,
         | 
| 116 | 
            +
                0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
         | 
| 117 | 
            +
                3,3,3,3,3,3,1,2,3,3,3,2,1,2,3,3,2,2,2,3,2,3,3,1,3,3,1,1,0,2,3,2,
         | 
| 118 | 
            +
                0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
         | 
| 119 | 
            +
                3,3,3,1,2,2,2,2,3,3,3,1,1,1,3,3,1,1,3,1,1,3,2,1,2,3,1,1,0,2,2,2,
         | 
| 120 | 
            +
                0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
         | 
| 121 | 
            +
                3,3,3,2,1,2,1,1,3,3,1,1,1,1,3,3,1,1,2,2,1,2,1,1,2,2,1,1,0,2,2,1,
         | 
| 122 | 
            +
                0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
         | 
| 123 | 
            +
                3,3,3,1,1,2,1,1,3,3,1,0,1,1,3,3,2,0,1,1,2,3,1,0,2,2,1,0,0,1,3,2,
         | 
| 124 | 
            +
                0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
         | 
| 125 | 
            +
                3,2,1,3,3,3,3,3,1,2,3,2,3,3,2,1,1,3,2,3,2,1,2,2,0,1,2,1,0,0,1,1,
         | 
| 126 | 
            +
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
         | 
| 127 | 
            +
                3,3,3,3,2,2,2,2,3,1,2,2,1,1,3,3,0,3,2,1,2,3,2,1,3,3,1,1,0,2,1,3,
         | 
| 128 | 
            +
                0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
         | 
| 129 | 
            +
                3,3,3,2,2,2,3,2,3,3,3,2,1,1,3,3,1,1,1,2,2,3,2,3,2,2,2,1,0,2,2,1,
         | 
| 130 | 
            +
                0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
         | 
| 131 | 
            +
                1,0,0,3,3,3,3,3,0,0,3,3,2,3,0,0,0,2,3,3,1,0,1,2,0,0,1,1,0,0,0,0,
         | 
| 132 | 
            +
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 133 | 
            +
                3,1,2,3,3,3,3,3,1,2,3,3,2,2,1,1,0,3,3,2,2,1,2,2,1,0,2,2,0,1,1,1,
         | 
| 134 | 
            +
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 135 | 
            +
                3,3,2,2,1,3,1,2,3,3,2,2,1,1,2,2,1,1,1,1,3,2,1,1,1,1,2,1,0,1,2,1,
         | 
| 136 | 
            +
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
         | 
| 137 | 
            +
                2,3,3,1,1,1,1,1,3,3,3,0,1,1,3,3,1,1,1,1,1,2,2,0,3,1,1,2,0,2,1,1,
         | 
| 138 | 
            +
                0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
         | 
| 139 | 
            +
                3,1,0,1,2,1,2,2,0,1,2,3,1,2,0,0,0,2,1,1,1,1,1,2,0,0,1,1,0,0,0,0,
         | 
| 140 | 
            +
                1,2,1,2,2,2,1,2,1,2,0,2,0,2,2,1,1,2,1,1,2,1,1,1,0,1,0,0,0,1,1,0,
         | 
| 141 | 
            +
                1,1,1,2,3,2,3,3,0,1,2,2,3,1,0,1,0,2,1,2,2,0,1,1,0,0,1,1,0,0,0,0,
         | 
| 142 | 
            +
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 143 | 
            +
                1,0,0,3,3,2,2,1,0,0,3,2,3,2,0,0,0,1,1,3,0,0,1,1,0,0,2,1,0,0,0,0,
         | 
| 144 | 
            +
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 145 | 
            +
                3,1,1,2,2,3,3,1,0,1,3,2,3,1,1,1,0,1,1,1,1,1,3,1,0,0,2,2,0,0,0,0,
         | 
| 146 | 
            +
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 147 | 
            +
                3,1,1,1,2,2,2,1,0,1,2,3,3,2,0,0,0,2,1,1,1,2,1,1,1,0,1,1,1,0,0,0,
         | 
| 148 | 
            +
                1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,2,1,1,1,1,1,1,0,1,1,1,0,0,1,1,
         | 
| 149 | 
            +
                3,2,2,1,0,0,1,1,2,2,0,3,0,1,2,1,1,0,0,1,1,1,0,1,1,1,1,0,2,1,1,1,
         | 
| 150 | 
            +
                2,2,1,1,1,2,1,2,1,1,1,1,1,1,1,2,1,1,1,2,3,1,1,1,1,1,1,1,1,1,0,1,
         | 
| 151 | 
            +
                2,3,3,0,1,0,0,0,3,3,1,0,0,1,2,2,1,0,0,0,0,2,0,0,1,1,1,0,2,1,1,1,
         | 
| 152 | 
            +
                2,1,1,1,1,1,1,2,1,1,0,1,1,0,1,1,1,0,1,2,1,1,0,1,1,1,1,1,1,1,0,1,
         | 
| 153 | 
            +
                2,3,3,0,1,0,0,0,2,2,0,0,0,0,1,2,2,0,0,0,0,1,0,0,1,1,0,0,2,0,1,0,
         | 
| 154 | 
            +
                2,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1,
         | 
| 155 | 
            +
                3,2,2,0,1,0,1,0,2,3,2,0,0,1,2,2,1,0,0,1,1,1,0,0,2,1,0,1,2,2,1,1,
         | 
| 156 | 
            +
                2,1,1,1,1,1,1,2,1,1,1,1,1,1,0,2,1,0,1,1,0,1,1,1,0,1,1,2,1,1,0,1,
         | 
| 157 | 
            +
                2,2,2,0,0,1,0,0,2,2,1,1,0,0,2,1,1,0,0,0,1,2,0,0,2,1,0,0,2,1,1,1,
         | 
| 158 | 
            +
                2,1,1,1,1,2,1,2,1,1,1,2,2,1,1,2,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,
         | 
| 159 | 
            +
                1,2,3,0,0,0,1,0,3,2,1,0,0,1,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,2,1,
         | 
| 160 | 
            +
                1,1,0,0,0,1,0,1,1,1,1,1,2,0,0,1,0,0,0,2,0,0,1,1,1,1,1,1,1,1,0,1,
         | 
| 161 | 
            +
                3,0,0,2,1,2,2,1,0,0,2,1,2,2,0,0,0,2,1,1,1,0,1,1,0,0,1,1,2,0,0,0,
         | 
| 162 | 
            +
                1,2,1,2,2,1,1,2,1,2,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,0,0,1,
         | 
| 163 | 
            +
                1,3,2,0,0,0,1,0,2,2,2,0,0,0,2,2,1,0,0,0,0,3,1,1,1,1,0,0,2,1,1,1,
         | 
| 164 | 
            +
                2,1,0,1,1,1,0,1,1,1,1,1,1,1,0,2,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,
         | 
| 165 | 
            +
                2,3,2,0,0,0,1,0,2,2,0,0,0,0,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,1,0,
         | 
| 166 | 
            +
                2,1,1,1,1,2,1,2,1,2,0,1,1,1,0,2,1,1,1,2,1,1,1,1,0,1,1,1,1,1,0,1,
         | 
| 167 | 
            +
                3,1,1,2,2,2,3,2,1,1,2,2,1,1,0,1,0,2,2,1,1,1,1,1,0,0,1,1,0,1,1,0,
         | 
| 168 | 
            +
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 169 | 
            +
                2,2,2,0,0,0,0,0,2,2,0,0,0,0,2,2,1,0,0,0,1,1,0,0,1,2,0,0,2,1,1,1,
         | 
| 170 | 
            +
                2,2,1,1,1,2,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,1,1,0,1,2,1,1,1,0,1,
         | 
| 171 | 
            +
                1,0,0,1,2,3,2,1,0,0,2,0,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0,
         | 
| 172 | 
            +
                1,2,1,2,1,2,1,1,1,2,0,2,1,1,1,0,1,2,0,0,1,1,1,0,0,0,0,0,0,0,0,0,
         | 
| 173 | 
            +
                2,3,2,0,0,0,0,0,1,1,2,1,0,0,1,1,1,0,0,0,0,2,0,0,1,1,0,0,2,1,1,1,
         | 
| 174 | 
            +
                2,1,1,1,1,1,1,2,1,0,1,1,1,1,0,2,1,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,
         | 
| 175 | 
            +
                1,2,2,0,1,1,1,0,2,2,2,0,0,0,3,2,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,
         | 
| 176 | 
            +
                1,1,0,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,0,0,1,1,1,0,1,0,1,
         | 
| 177 | 
            +
                2,1,0,2,1,1,2,2,1,1,2,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,0,0,0,
         | 
| 178 | 
            +
                1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,1,0,
         | 
| 179 | 
            +
                1,2,3,0,0,0,1,0,2,2,0,0,0,0,2,2,0,0,0,0,0,1,0,0,1,0,0,0,2,0,1,0,
         | 
| 180 | 
            +
                2,1,1,1,1,1,0,2,0,0,0,1,2,1,1,1,1,0,1,2,0,1,0,1,0,1,1,1,0,1,0,1,
         | 
| 181 | 
            +
                2,2,2,0,0,0,1,0,2,1,2,0,0,0,1,1,2,0,0,0,0,1,0,0,1,1,0,0,2,1,0,1,
         | 
| 182 | 
            +
                2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1,
         | 
| 183 | 
            +
                1,2,2,0,0,0,1,0,2,2,2,0,0,0,1,1,0,0,0,0,0,1,1,0,2,0,0,1,1,1,0,1,
         | 
| 184 | 
            +
                1,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,0,1,
         | 
| 185 | 
            +
                1,0,0,1,0,1,2,1,0,0,1,1,1,2,0,0,0,1,1,0,1,0,1,1,0,0,1,0,0,0,0,0,
         | 
| 186 | 
            +
                0,2,1,2,1,1,1,1,1,2,0,2,0,1,1,0,1,2,1,0,1,1,1,0,0,0,0,0,0,1,0,0,
         | 
| 187 | 
            +
                2,1,1,0,1,2,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,2,1,0,1,
         | 
| 188 | 
            +
                2,2,1,1,1,1,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,0,1,0,1,1,1,1,1,0,1,
         | 
| 189 | 
            +
                1,2,2,0,0,0,0,0,1,1,0,0,0,0,2,1,0,0,0,0,0,2,0,0,2,2,0,0,2,0,0,1,
         | 
| 190 | 
            +
                2,1,1,1,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,
         | 
| 191 | 
            +
                1,1,2,0,0,3,1,0,2,1,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,
         | 
| 192 | 
            +
                1,2,1,0,1,1,1,2,1,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,0,0,
         | 
| 193 | 
            +
                2,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,2,0,0,0,
         | 
| 194 | 
            +
                2,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,1,0,1,
         | 
| 195 | 
            +
                2,1,1,1,2,1,1,1,0,1,1,2,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,
         | 
| 196 | 
            +
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 197 | 
            +
                1,1,0,1,1,1,1,1,0,0,1,1,2,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,0,0,
         | 
| 198 | 
            +
                1,2,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0,
         | 
| 199 | 
            +
                2,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,1,1,2,0,0,1,0,0,1,0,1,0,0,0,
         | 
| 200 | 
            +
                0,1,1,1,1,1,1,1,1,2,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,
         | 
| 201 | 
            +
                1,0,0,1,1,1,1,1,0,0,2,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,
         | 
| 202 | 
            +
                0,1,1,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,0,
         | 
| 203 | 
            +
                1,0,0,1,1,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
         | 
| 204 | 
            +
                0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,
         | 
| 205 | 
            +
                0,0,0,1,0,0,0,0,0,0,1,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 206 | 
            +
                0,1,1,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,
         | 
| 207 | 
            +
                2,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,0,
         | 
| 208 | 
            +
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 209 | 
            +
                1,0,0,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 210 | 
            +
                0,1,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,
         | 
| 211 | 
            +
                ]
         | 
| 212 | 
            +
             | 
| 213 | 
            +
                Latin2HungarianModel = { \
         | 
| 214 | 
            +
                  'charToOrderMap' => Latin2_HungarianCharToOrderMap,
         | 
| 215 | 
            +
                  'precedenceMatrix' => HungarianLangModel,
         | 
| 216 | 
            +
                  'mTypicalPositiveRatio' => 0.947368,
         | 
| 217 | 
            +
                  'keepEnglishLetter' => true,
         | 
| 218 | 
            +
                  'charsetName' => "ISO-8859-2"
         | 
| 219 | 
            +
                }
         | 
| 220 | 
            +
             | 
| 221 | 
            +
                Win1250HungarianModel = { \
         | 
| 222 | 
            +
                  'charToOrderMap' => Win1250HungarianCharToOrderMap,
         | 
| 223 | 
            +
                  'precedenceMatrix' => HungarianLangModel,
         | 
| 224 | 
            +
                  'mTypicalPositiveRatio' => 0.947368,
         | 
| 225 | 
            +
                  'keepEnglishLetter' => true,
         | 
| 226 | 
            +
                  'charsetName' => "windows-1250"
         | 
| 227 | 
            +
                }
         | 
| 228 | 
            +
            end
         | 
| @@ -0,0 +1,203 @@ | |
| 1 | 
            +
            ######################## BEGIN LICENSE BLOCK ########################
         | 
| 2 | 
            +
            # The Original Code is mozilla.org code.
         | 
| 3 | 
            +
            #
         | 
| 4 | 
            +
            # The Initial Developer of the Original Code is
         | 
| 5 | 
            +
            # Netscape Communications Corporation.
         | 
| 6 | 
            +
            # Portions created by the Initial Developer are Copyright (C) 1998
         | 
| 7 | 
            +
            # the Initial Developer. All Rights Reserved.
         | 
| 8 | 
            +
            #
         | 
| 9 | 
            +
            # Contributor(s):
         | 
| 10 | 
            +
            #   Hui (zhengzhengzheng@gmail.com) - port to Ruby
         | 
| 11 | 
            +
            #   Mark Pilgrim - first port to Python
         | 
| 12 | 
            +
            #
         | 
| 13 | 
            +
            # This library is free software; you can redistribute it and/or
         | 
| 14 | 
            +
            # modify it under the terms of the GNU Lesser General Public
         | 
| 15 | 
            +
            # License as published by the Free Software Foundation; either
         | 
| 16 | 
            +
            # version 2.1 of the License, or (at your option) any later version.
         | 
| 17 | 
            +
            # 
         | 
| 18 | 
            +
            # This library is distributed in the hope that it will be useful,
         | 
| 19 | 
            +
            # but WITHOUT ANY WARRANTY; without even the implied warranty of
         | 
| 20 | 
            +
            # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
         | 
| 21 | 
            +
            # Lesser General Public License for more details.
         | 
| 22 | 
            +
            # 
         | 
| 23 | 
            +
            # You should have received a copy of the GNU Lesser General Public
         | 
| 24 | 
            +
            # License along with this library; if not, write to the Free Software
         | 
| 25 | 
            +
            # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
         | 
| 26 | 
            +
            # 02110-1301  USA
         | 
| 27 | 
            +
            ######################### END LICENSE BLOCK #########################
         | 
| 28 | 
            +
             | 
| 29 | 
            +
            require 'UniversalDetector'
         | 
| 30 | 
            +
             | 
| 31 | 
            +
            module UniversalDetector
         | 
| 32 | 
            +
                # 255 => Control characters that usually does not exist in any text
         | 
| 33 | 
            +
                # 254 => Carriage/Return
         | 
| 34 | 
            +
                # 253 => symbol [punctuation] that does not belong to word
         | 
| 35 | 
            +
                # 252 => 0 - 9
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                # The following result for thai was collected from a limited sample [1M]. 
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                # Character Mapping Table =>
         | 
| 40 | 
            +
                TIS620CharToOrderMap = [ \
         | 
| 41 | 
            +
                255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255,  # 00
         | 
| 42 | 
            +
                255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,  # 10
         | 
| 43 | 
            +
                253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,  # 20
         | 
| 44 | 
            +
                252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253,  # 30
         | 
| 45 | 
            +
                253,182,106,107,100,183,184,185,101, 94,186,187,108,109,110,111,  # 40
         | 
| 46 | 
            +
                188,189,190, 89, 95,112,113,191,192,193,194,253,253,253,253,253,  # 50
         | 
| 47 | 
            +
                253, 64, 72, 73,114, 74,115,116,102, 81,201,117, 90,103, 78, 82,  # 60
         | 
| 48 | 
            +
                 96,202, 91, 79, 84,104,105, 97, 98, 92,203,253,253,253,253,253,  # 70
         | 
| 49 | 
            +
                209,210,211,212,213, 88,214,215,216,217,218,219,220,118,221,222,
         | 
| 50 | 
            +
                223,224, 99, 85, 83,225,226,227,228,229,230,231,232,233,234,235,
         | 
| 51 | 
            +
                236,  5, 30,237, 24,238, 75,  8, 26, 52, 34, 51,119, 47, 58, 57,
         | 
| 52 | 
            +
                 49, 53, 55, 43, 20, 19, 44, 14, 48,  3, 17, 25, 39, 62, 31, 54,
         | 
| 53 | 
            +
                 45,  9, 16,  2, 61, 15,239, 12, 42, 46, 18, 21, 76,  4, 66, 63,
         | 
| 54 | 
            +
                 22, 10,  1, 36, 23, 13, 40, 27, 32, 35, 86,240,241,242,243,244,
         | 
| 55 | 
            +
                 11, 28, 41, 29, 33,245, 50, 37,  6,  7, 67, 77, 38, 93,246,247,
         | 
| 56 | 
            +
                 68, 56, 59, 65, 69, 60, 70, 80, 71, 87,248,249,250,251,252,253,
         | 
| 57 | 
            +
                ]
         | 
| 58 | 
            +
             | 
| 59 | 
            +
                # Model Table => 
         | 
| 60 | 
            +
                # total sequences => 100%
         | 
| 61 | 
            +
                # first 512 sequences => 92.6386%
         | 
| 62 | 
            +
                # first 1024 sequences =>7.3177%
         | 
| 63 | 
            +
                # rest  sequences =>     1.0230%
         | 
| 64 | 
            +
                # negative sequences =>  0.0436% 
         | 
| 65 | 
            +
                ThaiLangModel = [ \
         | 
| 66 | 
            +
                0,1,3,3,3,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,0,0,3,3,3,0,3,3,3,3,
         | 
| 67 | 
            +
                0,3,3,0,0,0,1,3,0,3,3,2,3,3,0,1,2,3,3,3,3,0,2,0,2,0,0,3,2,1,2,2,
         | 
| 68 | 
            +
                3,0,3,3,2,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,0,3,2,3,0,2,2,2,3,
         | 
| 69 | 
            +
                0,2,3,0,0,0,0,1,0,1,2,3,1,1,3,2,2,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,
         | 
| 70 | 
            +
                3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,3,3,2,3,2,3,3,2,2,2,
         | 
| 71 | 
            +
                3,1,2,3,0,3,3,2,2,1,2,3,3,1,2,0,1,3,0,1,0,0,1,0,0,0,0,0,0,0,1,1,
         | 
| 72 | 
            +
                3,3,2,2,3,3,3,3,1,2,3,3,3,3,3,2,2,2,2,3,3,2,2,3,3,2,2,3,2,3,2,2,
         | 
| 73 | 
            +
                3,3,1,2,3,1,2,2,3,3,1,0,2,1,0,0,3,1,2,1,0,0,1,0,0,0,0,0,0,1,0,1,
         | 
| 74 | 
            +
                3,3,3,3,3,3,2,2,3,3,3,3,2,3,2,2,3,3,2,2,3,2,2,2,2,1,1,3,1,2,1,1,
         | 
| 75 | 
            +
                3,2,1,0,2,1,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,
         | 
| 76 | 
            +
                3,3,3,2,3,2,3,3,2,2,3,2,3,3,2,3,1,1,2,3,2,2,2,3,2,2,2,2,2,1,2,1,
         | 
| 77 | 
            +
                2,2,1,1,3,3,2,1,0,1,2,2,0,1,3,0,0,0,1,1,0,0,0,0,0,2,3,0,0,2,1,1,
         | 
| 78 | 
            +
                3,3,2,3,3,2,0,0,3,3,0,3,3,0,2,2,3,1,2,2,1,1,1,0,2,2,2,0,2,2,1,1,
         | 
| 79 | 
            +
                0,2,1,0,2,0,0,2,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0,
         | 
| 80 | 
            +
                3,3,2,3,3,2,0,0,3,3,0,2,3,0,2,1,2,2,2,2,1,2,0,0,2,2,2,0,2,2,1,1,
         | 
| 81 | 
            +
                0,2,1,0,2,0,0,2,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,
         | 
| 82 | 
            +
                3,3,2,3,2,3,2,0,2,2,1,3,2,1,3,2,1,2,3,2,2,3,0,2,3,2,2,1,2,2,2,2,
         | 
| 83 | 
            +
                1,2,2,0,0,0,0,2,0,1,2,0,1,1,1,0,1,0,3,1,1,0,0,0,0,0,0,0,0,0,1,0,
         | 
| 84 | 
            +
                3,3,2,3,3,2,3,2,2,2,3,2,2,3,2,2,1,2,3,2,2,3,1,3,2,2,2,3,2,2,2,3,
         | 
| 85 | 
            +
                3,2,1,3,0,1,1,1,0,2,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,2,0,0,
         | 
| 86 | 
            +
                1,0,0,3,0,3,3,3,3,3,0,0,3,0,2,2,3,3,3,3,3,0,0,0,1,1,3,0,0,0,0,2,
         | 
| 87 | 
            +
                0,0,1,0,0,0,0,0,0,0,2,3,0,0,0,3,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,
         | 
| 88 | 
            +
                2,0,3,3,3,3,0,0,2,3,0,0,3,0,3,3,2,3,3,3,3,3,0,0,3,3,3,0,0,0,3,3,
         | 
| 89 | 
            +
                0,0,3,0,0,0,0,2,0,0,2,1,1,3,0,0,1,0,0,2,3,0,1,0,0,0,0,0,0,0,1,0,
         | 
| 90 | 
            +
                3,3,3,3,2,3,3,3,3,3,3,3,1,2,1,3,3,2,2,1,2,2,2,3,1,1,2,0,2,1,2,1,
         | 
| 91 | 
            +
                2,2,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,
         | 
| 92 | 
            +
                3,0,2,1,2,3,3,3,0,2,0,2,2,0,2,1,3,2,2,1,2,1,0,0,2,2,1,0,2,1,2,2,
         | 
| 93 | 
            +
                0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 94 | 
            +
                3,3,3,3,2,1,3,3,1,1,3,0,2,3,1,1,3,2,1,1,2,0,2,2,3,2,1,1,1,1,1,2,
         | 
| 95 | 
            +
                3,0,0,1,3,1,2,1,2,0,3,0,0,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,
         | 
| 96 | 
            +
                3,3,1,1,3,2,3,3,3,1,3,2,1,3,2,1,3,2,2,2,2,1,3,3,1,2,1,3,1,2,3,0,
         | 
| 97 | 
            +
                2,1,1,3,2,2,2,1,2,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,
         | 
| 98 | 
            +
                3,3,2,3,2,3,3,2,3,2,3,2,3,3,2,1,0,3,2,2,2,1,2,2,2,1,2,2,1,2,1,1,
         | 
| 99 | 
            +
                2,2,2,3,0,1,3,1,1,1,1,0,1,1,0,2,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 100 | 
            +
                3,3,3,3,2,3,2,2,1,1,3,2,3,2,3,2,0,3,2,2,1,2,0,2,2,2,1,2,2,2,2,1,
         | 
| 101 | 
            +
                3,2,1,2,2,1,0,2,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,
         | 
| 102 | 
            +
                3,3,3,3,3,2,3,1,2,3,3,2,2,3,0,1,1,2,0,3,3,2,2,3,0,1,1,3,0,0,0,0,
         | 
| 103 | 
            +
                3,1,0,3,3,0,2,0,2,1,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 104 | 
            +
                3,3,3,2,3,2,3,3,0,1,3,1,1,2,1,2,1,1,3,1,1,0,2,3,1,1,1,1,1,1,1,1,
         | 
| 105 | 
            +
                3,1,1,2,2,2,2,1,1,1,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
         | 
| 106 | 
            +
                3,2,2,1,1,2,1,3,3,2,3,2,2,3,2,2,3,1,2,2,1,2,0,3,2,1,2,2,2,2,2,1,
         | 
| 107 | 
            +
                3,2,1,2,2,2,1,1,1,1,0,0,1,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 108 | 
            +
                3,3,3,3,3,3,3,3,1,3,3,0,2,1,0,3,2,0,0,3,1,0,1,1,0,1,0,0,0,0,0,1,
         | 
| 109 | 
            +
                1,0,0,1,0,3,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 110 | 
            +
                3,0,2,2,2,3,0,0,1,3,0,3,2,0,3,2,2,3,3,3,3,3,1,0,2,2,2,0,2,2,1,2,
         | 
| 111 | 
            +
                0,2,3,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
         | 
| 112 | 
            +
                3,0,2,3,1,3,3,2,3,3,0,3,3,0,3,2,2,3,2,3,3,3,0,0,2,2,3,0,1,1,1,3,
         | 
| 113 | 
            +
                0,0,3,0,0,0,2,2,0,1,3,0,1,2,2,2,3,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,
         | 
| 114 | 
            +
                3,2,3,3,2,0,3,3,2,2,3,1,3,2,1,3,2,0,1,2,2,0,2,3,2,1,0,3,0,0,0,0,
         | 
| 115 | 
            +
                3,0,0,2,3,1,3,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 116 | 
            +
                3,1,3,2,2,2,1,2,0,1,3,1,1,3,1,3,0,0,2,1,1,1,1,2,1,1,1,0,2,1,0,1,
         | 
| 117 | 
            +
                1,2,0,0,0,3,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,3,1,0,0,0,1,0,
         | 
| 118 | 
            +
                3,3,3,3,2,2,2,2,2,1,3,1,1,1,2,0,1,1,2,1,2,1,3,2,0,0,3,1,1,1,1,1,
         | 
| 119 | 
            +
                3,1,0,2,3,0,0,0,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 120 | 
            +
                0,0,0,2,3,0,3,3,0,2,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 121 | 
            +
                0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 122 | 
            +
                0,0,2,3,1,3,0,0,1,2,0,0,2,0,3,3,2,3,3,3,2,3,0,0,2,2,2,0,0,0,2,2,
         | 
| 123 | 
            +
                0,0,1,0,0,0,0,3,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
         | 
| 124 | 
            +
                0,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,1,2,3,1,3,3,0,0,1,0,3,0,0,0,0,0,
         | 
| 125 | 
            +
                0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 126 | 
            +
                3,3,1,2,3,1,2,3,1,0,3,0,2,2,1,0,2,1,1,2,0,1,0,0,1,1,1,1,0,1,0,0,
         | 
| 127 | 
            +
                1,0,0,0,0,1,1,0,3,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 128 | 
            +
                3,3,3,3,2,1,0,1,1,1,3,1,2,2,2,2,2,2,1,1,1,1,0,3,1,0,1,3,1,1,1,1,
         | 
| 129 | 
            +
                1,1,0,2,0,1,3,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,1,
         | 
| 130 | 
            +
                3,0,2,2,1,3,3,2,3,3,0,1,1,0,2,2,1,2,1,3,3,1,0,0,3,2,0,0,0,0,2,1,
         | 
| 131 | 
            +
                0,1,0,0,0,0,1,2,0,1,1,3,1,1,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
         | 
| 132 | 
            +
                0,0,3,0,0,1,0,0,0,3,0,0,3,0,3,1,0,1,1,1,3,2,0,0,0,3,0,0,0,0,2,0,
         | 
| 133 | 
            +
                0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,
         | 
| 134 | 
            +
                3,3,1,3,2,1,3,3,1,2,2,0,1,2,1,0,1,2,0,0,0,0,0,3,0,0,0,3,0,0,0,0,
         | 
| 135 | 
            +
                3,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 136 | 
            +
                3,0,1,2,0,3,3,3,2,2,0,1,1,0,1,3,0,0,0,2,2,0,0,0,0,3,1,0,1,0,0,0,
         | 
| 137 | 
            +
                0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 138 | 
            +
                3,0,2,3,1,2,0,0,2,1,0,3,1,0,1,2,0,1,1,1,1,3,0,0,3,1,1,0,2,2,1,1,
         | 
| 139 | 
            +
                0,2,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 140 | 
            +
                3,0,0,3,1,2,0,0,2,2,0,1,2,0,1,0,1,3,1,2,1,0,0,0,2,0,3,0,0,0,1,0,
         | 
| 141 | 
            +
                0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 142 | 
            +
                3,0,1,1,2,2,0,0,0,2,0,2,1,0,1,1,0,1,1,1,2,1,0,0,1,1,1,0,2,1,1,1,
         | 
| 143 | 
            +
                0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,
         | 
| 144 | 
            +
                0,0,0,2,0,1,3,1,1,1,1,0,0,0,0,3,2,0,1,0,0,0,1,2,0,0,0,1,0,0,0,0,
         | 
| 145 | 
            +
                0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 146 | 
            +
                0,0,0,0,0,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 147 | 
            +
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 148 | 
            +
                1,0,2,3,2,2,0,0,0,1,0,0,0,0,2,3,2,1,2,2,3,0,0,0,2,3,1,0,0,0,1,1,
         | 
| 149 | 
            +
                0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,
         | 
| 150 | 
            +
                3,3,2,2,0,1,0,0,0,0,2,0,2,0,1,0,0,0,1,1,0,0,0,2,1,0,1,0,1,1,0,0,
         | 
| 151 | 
            +
                0,1,0,2,0,0,1,0,3,0,1,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 152 | 
            +
                3,3,1,0,0,1,0,0,0,0,0,1,1,2,0,0,0,0,1,0,0,1,3,1,0,0,0,0,1,1,0,0,
         | 
| 153 | 
            +
                0,1,0,0,0,0,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,
         | 
| 154 | 
            +
                3,3,1,1,1,1,2,3,0,0,2,1,1,1,1,1,0,2,1,1,0,0,0,2,1,0,1,2,1,1,0,1,
         | 
| 155 | 
            +
                2,1,0,3,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 156 | 
            +
                1,3,1,0,0,0,0,0,0,0,3,0,0,0,3,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,
         | 
| 157 | 
            +
                0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 158 | 
            +
                3,3,2,0,0,0,0,0,0,1,2,1,0,1,1,0,2,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,
         | 
| 159 | 
            +
                0,0,0,0,0,0,2,0,0,0,1,3,0,1,0,0,0,2,0,0,0,0,0,0,0,1,2,0,0,0,0,0,
         | 
| 160 | 
            +
                3,3,0,0,1,1,2,0,0,1,2,1,0,1,1,1,0,1,1,0,0,2,1,1,0,1,0,0,1,1,1,0,
         | 
| 161 | 
            +
                0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 162 | 
            +
                2,2,2,1,0,0,0,0,1,0,0,0,0,3,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,
         | 
| 163 | 
            +
                2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 164 | 
            +
                2,3,0,0,1,1,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 165 | 
            +
                0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 166 | 
            +
                3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 167 | 
            +
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 168 | 
            +
                1,1,0,1,2,0,1,2,0,0,1,1,0,2,0,1,0,0,1,0,0,0,0,1,0,0,0,2,0,0,0,0,
         | 
| 169 | 
            +
                1,0,0,1,0,1,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 170 | 
            +
                0,1,0,0,0,0,0,0,0,1,1,0,1,1,0,2,1,3,0,0,0,0,1,1,0,0,0,0,0,0,0,3,
         | 
| 171 | 
            +
                1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 172 | 
            +
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
         | 
| 173 | 
            +
                0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 174 | 
            +
                2,0,1,0,1,0,0,2,0,0,2,0,0,1,1,2,0,0,1,1,0,0,0,1,0,0,0,1,1,0,0,0,
         | 
| 175 | 
            +
                1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
         | 
| 176 | 
            +
                1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,
         | 
| 177 | 
            +
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 178 | 
            +
                3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 179 | 
            +
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0,
         | 
| 180 | 
            +
                2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,
         | 
| 181 | 
            +
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 182 | 
            +
                2,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,
         | 
| 183 | 
            +
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 184 | 
            +
                2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 185 | 
            +
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,3,0,0,0,
         | 
| 186 | 
            +
                2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 187 | 
            +
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,
         | 
| 188 | 
            +
                1,0,0,0,0,0,0,0,0,1,0,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 189 | 
            +
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 190 | 
            +
                0,0,1,1,0,0,2,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 191 | 
            +
                0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 192 | 
            +
                2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 193 | 
            +
                0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
         | 
| 194 | 
            +
                ]
         | 
| 195 | 
            +
             | 
| 196 | 
            +
                TIS620ThaiModel = { \
         | 
| 197 | 
            +
                  'charToOrderMap' => TIS620CharToOrderMap,
         | 
| 198 | 
            +
                  'precedenceMatrix' => ThaiLangModel,
         | 
| 199 | 
            +
                  'mTypicalPositiveRatio' => 0.926386,
         | 
| 200 | 
            +
                  'keepEnglishLetter' => false,
         | 
| 201 | 
            +
                  'charsetName' => "TIS-620"
         | 
| 202 | 
            +
                }
         | 
| 203 | 
            +
            end
         |