charguess 1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (135) hide show
  1. data/History.txt +4 -0
  2. data/Manifest.txt +134 -0
  3. data/PostInstall.txt +7 -0
  4. data/README.rdoc +26 -0
  5. data/Rakefile +25 -0
  6. data/ext/charguess/charguess.c +29 -0
  7. data/ext/charguess/extconf.rb +11 -0
  8. data/ext/libcharguess/cpp/.deps/EUCJPProber.Plo +1 -0
  9. data/ext/libcharguess/cpp/.deps/EUCJPProber.Po +87 -0
  10. data/ext/libcharguess/cpp/.deps/EUCKRProber.Plo +1 -0
  11. data/ext/libcharguess/cpp/.deps/EUCKRProber.Po +85 -0
  12. data/ext/libcharguess/cpp/.deps/EUCTWProber.Plo +1 -0
  13. data/ext/libcharguess/cpp/.deps/EUCTWProber.Po +85 -0
  14. data/ext/libcharguess/cpp/.deps/EscCharsetProber.Plo +1 -0
  15. data/ext/libcharguess/cpp/.deps/EscCharsetProber.Po +83 -0
  16. data/ext/libcharguess/cpp/.deps/EscSM.Plo +1 -0
  17. data/ext/libcharguess/cpp/.deps/EscSM.Po +77 -0
  18. data/ext/libcharguess/cpp/.deps/GB2312Prober.Plo +1 -0
  19. data/ext/libcharguess/cpp/.deps/GB2312Prober.Po +85 -0
  20. data/ext/libcharguess/cpp/.deps/LangBulgarianModel.Plo +1 -0
  21. data/ext/libcharguess/cpp/.deps/LangBulgarianModel.Po +78 -0
  22. data/ext/libcharguess/cpp/.deps/LangCyrillicModel.Plo +1 -0
  23. data/ext/libcharguess/cpp/.deps/LangCyrillicModel.Po +78 -0
  24. data/ext/libcharguess/cpp/.deps/LangGreekModel.Plo +1 -0
  25. data/ext/libcharguess/cpp/.deps/LangGreekModel.Po +78 -0
  26. data/ext/libcharguess/cpp/.deps/LangHungarianModel.Plo +1 -0
  27. data/ext/libcharguess/cpp/.deps/LangHungarianModel.Po +78 -0
  28. data/ext/libcharguess/cpp/.deps/LangThaiModel.Plo +1 -0
  29. data/ext/libcharguess/cpp/.deps/LangThaiModel.Po +78 -0
  30. data/ext/libcharguess/cpp/.deps/Latin1Prober.Plo +1 -0
  31. data/ext/libcharguess/cpp/.deps/Latin1Prober.Po +78 -0
  32. data/ext/libcharguess/cpp/.deps/MBCSGroupProber.Plo +1 -0
  33. data/ext/libcharguess/cpp/.deps/MBCSGroupProber.Po +102 -0
  34. data/ext/libcharguess/cpp/.deps/MBCSSM.Plo +1 -0
  35. data/ext/libcharguess/cpp/.deps/MBCSSM.Po +77 -0
  36. data/ext/libcharguess/cpp/.deps/SBCSGroupProber.Plo +1 -0
  37. data/ext/libcharguess/cpp/.deps/SBCSGroupProber.Po +80 -0
  38. data/ext/libcharguess/cpp/.deps/SBCharsetProber.Plo +1 -0
  39. data/ext/libcharguess/cpp/.deps/SBCharsetProber.Po +78 -0
  40. data/ext/libcharguess/cpp/.deps/SJISProber.Plo +1 -0
  41. data/ext/libcharguess/cpp/.deps/SJISProber.Po +86 -0
  42. data/ext/libcharguess/cpp/.deps/UTF8Prober.Plo +1 -0
  43. data/ext/libcharguess/cpp/.deps/UTF8Prober.Po +82 -0
  44. data/ext/libcharguess/cpp/.deps/big5Prober.Plo +1 -0
  45. data/ext/libcharguess/cpp/.deps/big5Prober.Po +84 -0
  46. data/ext/libcharguess/cpp/.deps/charDistribution.Plo +1 -0
  47. data/ext/libcharguess/cpp/.deps/charDistribution.Po +87 -0
  48. data/ext/libcharguess/cpp/.deps/chardet.Plo +1 -0
  49. data/ext/libcharguess/cpp/.deps/chardet.Po +84 -0
  50. data/ext/libcharguess/cpp/.deps/charguess.Po +77 -0
  51. data/ext/libcharguess/cpp/.deps/jpCntx.Plo +1 -0
  52. data/ext/libcharguess/cpp/.deps/jpCntx.Po +75 -0
  53. data/ext/libcharguess/cpp/.deps/universal.Plo +1 -0
  54. data/ext/libcharguess/cpp/.deps/universal.Po +111 -0
  55. data/ext/libcharguess/cpp/AUTHORS +3 -0
  56. data/ext/libcharguess/cpp/Big5Freq.tab +928 -0
  57. data/ext/libcharguess/cpp/COPYING +340 -0
  58. data/ext/libcharguess/cpp/COPYRIGHT +20 -0
  59. data/ext/libcharguess/cpp/ChangeLog +0 -0
  60. data/ext/libcharguess/cpp/EUCJPProber.cpp +80 -0
  61. data/ext/libcharguess/cpp/EUCJPProber.h +58 -0
  62. data/ext/libcharguess/cpp/EUCKRFreq.tab +615 -0
  63. data/ext/libcharguess/cpp/EUCKRProber.cpp +80 -0
  64. data/ext/libcharguess/cpp/EUCKRProber.h +54 -0
  65. data/ext/libcharguess/cpp/EUCTWFreq.tab +448 -0
  66. data/ext/libcharguess/cpp/EUCTWProber.cpp +79 -0
  67. data/ext/libcharguess/cpp/EUCTWProber.h +53 -0
  68. data/ext/libcharguess/cpp/EscCharsetProber.cpp +89 -0
  69. data/ext/libcharguess/cpp/EscCharsetProber.h +49 -0
  70. data/ext/libcharguess/cpp/EscSM.cpp +244 -0
  71. data/ext/libcharguess/cpp/GB2312Freq.tab +476 -0
  72. data/ext/libcharguess/cpp/GB2312Prober.cpp +84 -0
  73. data/ext/libcharguess/cpp/GB2312Prober.h +56 -0
  74. data/ext/libcharguess/cpp/INSTALL +229 -0
  75. data/ext/libcharguess/cpp/JISFreq.tab +574 -0
  76. data/ext/libcharguess/cpp/LICENSE +504 -0
  77. data/ext/libcharguess/cpp/LangBulgarianModel.cpp +230 -0
  78. data/ext/libcharguess/cpp/LangCyrillicModel.cpp +340 -0
  79. data/ext/libcharguess/cpp/LangGreekModel.cpp +229 -0
  80. data/ext/libcharguess/cpp/LangHungarianModel.cpp +228 -0
  81. data/ext/libcharguess/cpp/LangThaiModel.cpp +206 -0
  82. data/ext/libcharguess/cpp/Latin1Prober.cpp +190 -0
  83. data/ext/libcharguess/cpp/Latin1Prober.h +49 -0
  84. data/ext/libcharguess/cpp/MBCSGroupProber.cpp +186 -0
  85. data/ext/libcharguess/cpp/MBCSGroupProber.h +58 -0
  86. data/ext/libcharguess/cpp/MBCSSM.cpp +610 -0
  87. data/ext/libcharguess/cpp/Makefile.am +45 -0
  88. data/ext/libcharguess/cpp/Makefile.in +608 -0
  89. data/ext/libcharguess/cpp/NEWS +0 -0
  90. data/ext/libcharguess/cpp/README +0 -0
  91. data/ext/libcharguess/cpp/SBCSGroupProber.cpp +244 -0
  92. data/ext/libcharguess/cpp/SBCSGroupProber.h +54 -0
  93. data/ext/libcharguess/cpp/SBCharsetProber.cpp +100 -0
  94. data/ext/libcharguess/cpp/SBCharsetProber.h +89 -0
  95. data/ext/libcharguess/cpp/SJISProber.cpp +86 -0
  96. data/ext/libcharguess/cpp/SJISProber.h +60 -0
  97. data/ext/libcharguess/cpp/UTF8Prober.cpp +75 -0
  98. data/ext/libcharguess/cpp/UTF8Prober.h +46 -0
  99. data/ext/libcharguess/cpp/aclocal.m4 +1008 -0
  100. data/ext/libcharguess/cpp/autogen.sh +153 -0
  101. data/ext/libcharguess/cpp/big5Prober.cpp +76 -0
  102. data/ext/libcharguess/cpp/big5Prober.h +53 -0
  103. data/ext/libcharguess/cpp/charDistribution.cpp +90 -0
  104. data/ext/libcharguess/cpp/charDistribution.h +219 -0
  105. data/ext/libcharguess/cpp/charguess.cpp +56 -0
  106. data/ext/libcharguess/cpp/charguess.h +23 -0
  107. data/ext/libcharguess/cpp/charsetProber.h +50 -0
  108. data/ext/libcharguess/cpp/codingStateMachine.h +92 -0
  109. data/ext/libcharguess/cpp/config.h +36 -0
  110. data/ext/libcharguess/cpp/config.h.in +35 -0
  111. data/ext/libcharguess/cpp/config.status +1075 -0
  112. data/ext/libcharguess/cpp/configure +5226 -0
  113. data/ext/libcharguess/cpp/configure.in +49 -0
  114. data/ext/libcharguess/cpp/depcomp +472 -0
  115. data/ext/libcharguess/cpp/fix_copyright +32 -0
  116. data/ext/libcharguess/cpp/install-sh +294 -0
  117. data/ext/libcharguess/cpp/jpCntx.cpp +194 -0
  118. data/ext/libcharguess/cpp/jpCntx.h +100 -0
  119. data/ext/libcharguess/cpp/missing +336 -0
  120. data/ext/libcharguess/cpp/mkinstalldirs +111 -0
  121. data/ext/libcharguess/cpp/pkgInt.h +72 -0
  122. data/ext/libcharguess/cpp/stamp-h1 +1 -0
  123. data/ext/libcharguess/cpp/test/test.cpp +78 -0
  124. data/ext/libcharguess/cpp/types.h +41 -0
  125. data/ext/libcharguess/cpp/universal.cpp +273 -0
  126. data/ext/libcharguess/cpp/universal.h +65 -0
  127. data/script/console +9 -0
  128. data/script/destroy +14 -0
  129. data/script/generate +14 -0
  130. data/tasks/extconf/charguess.rake +47 -0
  131. data/tasks/extconf.rake +13 -0
  132. data/test/test_charguess.rb +7 -0
  133. data/test/test_charguess_extn.rb +10 -0
  134. data/test/test_helper.rb +3 -0
  135. metadata +219 -0
@@ -0,0 +1,79 @@
1
+ /*
2
+ libcharguess - Guess the encoding/charset of a string
3
+ Copyright (C) 2003 Stephane Corbe <noubi@users.sourceforge.net>
4
+ Based on Mozilla sources
5
+
6
+ This library is free software; you can redistribute it and/or
7
+ modify it under the terms of the GNU Lesser General Public
8
+ License as published by the Free Software Foundation; either
9
+ version 2.1 of the License, or (at your option) any later version.
10
+
11
+ This library is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ Lesser General Public License for more details.
15
+
16
+ You should have received a copy of the GNU Lesser General Public
17
+ License along with this library; if not, write to the Free Software
18
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ */
20
+
21
+ #include "EUCTWProber.h"
22
+
23
+ void nsEUCTWProber::Reset(void)
24
+ {
25
+ mCodingSM->Reset();
26
+ mState = eDetecting;
27
+ mDistributionAnalyser.Reset();
28
+ //mContextAnalyser.Reset();
29
+ }
30
+
31
+ nsProbingState nsEUCTWProber::HandleData(const char* aBuf, PRUint32 aLen)
32
+ {
33
+ nsSMState codingState;
34
+
35
+ for (PRUint32 i = 0; i < aLen; i++)
36
+ {
37
+ codingState = mCodingSM->NextState(aBuf[i]);
38
+ if (codingState == eError)
39
+ {
40
+ mState = eNotMe;
41
+ break;
42
+ }
43
+ if (codingState == eItsMe)
44
+ {
45
+ mState = eFoundIt;
46
+ break;
47
+ }
48
+ if (codingState == eStart)
49
+ {
50
+ PRUint32 charLen = mCodingSM->GetCurrentCharLen();
51
+
52
+ if (i == 0)
53
+ {
54
+ mLastChar[1] = aBuf[0];
55
+ mDistributionAnalyser.HandleOneChar(mLastChar, charLen);
56
+ }
57
+ else
58
+ mDistributionAnalyser.HandleOneChar(aBuf+i-1, charLen);
59
+ }
60
+ }
61
+
62
+ mLastChar[0] = aBuf[aLen-1];
63
+
64
+ if (mState == eDetecting)
65
+ if (mDistributionAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD)
66
+ mState = eFoundIt;
67
+ // else
68
+ // mDistributionAnalyser.HandleData(aBuf, aLen);
69
+
70
+ return mState;
71
+ }
72
+
73
+ float nsEUCTWProber::GetConfidence(void)
74
+ {
75
+ float distribCf = mDistributionAnalyser.GetConfidence();
76
+
77
+ return (float)distribCf;
78
+ }
79
+
@@ -0,0 +1,53 @@
1
+ /*
2
+ libcharguess - Guess the encoding/charset of a string
3
+ Copyright (C) 2003 Stephane Corbe <noubi@users.sourceforge.net>
4
+ Based on Mozilla sources
5
+
6
+ This library is free software; you can redistribute it and/or
7
+ modify it under the terms of the GNU Lesser General Public
8
+ License as published by the Free Software Foundation; either
9
+ version 2.1 of the License, or (at your option) any later version.
10
+
11
+ This library is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ Lesser General Public License for more details.
15
+
16
+ You should have received a copy of the GNU Lesser General Public
17
+ License along with this library; if not, write to the Free Software
18
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ */
20
+
21
+ #ifndef nsEUCTWProber_h__
22
+ #define nsEUCTWProber_h__
23
+
24
+ #include "charsetProber.h"
25
+ #include "codingStateMachine.h"
26
+ #include "charDistribution.h"
27
+
28
+ class nsEUCTWProber: public nsCharSetProber {
29
+ public:
30
+ nsEUCTWProber(void){mCodingSM = new nsCodingStateMachine(&EUCTWSMModel);
31
+ Reset();};
32
+ virtual ~nsEUCTWProber(void){delete mCodingSM;};
33
+ nsProbingState HandleData(const char* aBuf, PRUint32 aLen);
34
+ const char* GetCharSetName() {return "x-euc-tw";};
35
+ nsProbingState GetState(void) {return mState;};
36
+ void Reset(void);
37
+ float GetConfidence(void);
38
+ void SetOpion() {};
39
+
40
+ protected:
41
+ void GetDistribution(PRUint32 aCharLen, const char* aStr);
42
+
43
+ nsCodingStateMachine* mCodingSM;
44
+ nsProbingState mState;
45
+
46
+ //EUCTWContextAnalysis mContextAnalyser;
47
+ EUCTWDistributionAnalysis mDistributionAnalyser;
48
+ char mLastChar[2];
49
+
50
+ };
51
+
52
+
53
+ #endif /* nsEUCTWProber_h__ */
@@ -0,0 +1,89 @@
1
+ /*
2
+ libcharguess - Guess the encoding/charset of a string
3
+ Copyright (C) 2003 Stephane Corbe <noubi@users.sourceforge.net>
4
+ Based on Mozilla sources
5
+
6
+ This library is free software; you can redistribute it and/or
7
+ modify it under the terms of the GNU Lesser General Public
8
+ License as published by the Free Software Foundation; either
9
+ version 2.1 of the License, or (at your option) any later version.
10
+
11
+ This library is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ Lesser General Public License for more details.
15
+
16
+ You should have received a copy of the GNU Lesser General Public
17
+ License along with this library; if not, write to the Free Software
18
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ */
20
+
21
+ #include "EscCharsetProber.h"
22
+
23
+ nsEscCharSetProber::nsEscCharSetProber(void)
24
+ {
25
+ mCodingSM[0] = new nsCodingStateMachine(&HZSMModel);
26
+ mCodingSM[1] = new nsCodingStateMachine(&ISO2022CNSMModel);
27
+ mCodingSM[2] = new nsCodingStateMachine(&ISO2022JPSMModel);
28
+ mCodingSM[3] = new nsCodingStateMachine(&ISO2022KRSMModel);
29
+ mActiveSM = NUM_OF_ESC_CHARSETS;
30
+ mState = eDetecting;
31
+ mDetectedCharset = nsnull;
32
+ };
33
+
34
+ nsEscCharSetProber::~nsEscCharSetProber(void)
35
+ {
36
+ for (PRUint32 i = 0; i < NUM_OF_ESC_CHARSETS; i++)
37
+ delete mCodingSM[i];
38
+ }
39
+
40
+ void nsEscCharSetProber::Reset(void)
41
+ {
42
+ mState = eDetecting;
43
+ for (PRUint32 i = 0; i < NUM_OF_ESC_CHARSETS; i++)
44
+ mCodingSM[i]->Reset();
45
+ mActiveSM = NUM_OF_ESC_CHARSETS;
46
+ mDetectedCharset = nsnull;
47
+ }
48
+
49
+ nsProbingState nsEscCharSetProber::HandleData(const char* aBuf, PRUint32 aLen)
50
+ {
51
+ nsSMState codingState;
52
+ PRInt32 j;
53
+ PRUint32 i;
54
+
55
+ for ( i = 0; i < aLen && mState == eDetecting; i++)
56
+ {
57
+ for (j = mActiveSM-1; j>= 0; j--)
58
+ {
59
+ //byte is feed to all active state machine
60
+ codingState = mCodingSM[j]->NextState(aBuf[i]);
61
+ if (codingState == eError)
62
+ {
63
+ //got negative answer for this state machine, make it inactive
64
+ mActiveSM--;
65
+ if (mActiveSM == 0)
66
+ {
67
+ mState = eNotMe;
68
+ return mState;
69
+ }
70
+ else if (j != (PRInt32)mActiveSM)
71
+ {
72
+ nsCodingStateMachine* t;
73
+ t = mCodingSM[mActiveSM];
74
+ mCodingSM[mActiveSM] = mCodingSM[j];
75
+ mCodingSM[j] = t;
76
+ }
77
+ }
78
+ else if (codingState == eItsMe)
79
+ {
80
+ mState = eFoundIt;
81
+ mDetectedCharset = mCodingSM[j]->GetCodingStateMachine();
82
+ return mState;
83
+ }
84
+ }
85
+ }
86
+
87
+ return mState;
88
+ }
89
+
@@ -0,0 +1,49 @@
1
+ /*
2
+ libcharguess - Guess the encoding/charset of a string
3
+ Copyright (C) 2003 Stephane Corbe <noubi@users.sourceforge.net>
4
+ Based on Mozilla sources
5
+
6
+ This library is free software; you can redistribute it and/or
7
+ modify it under the terms of the GNU Lesser General Public
8
+ License as published by the Free Software Foundation; either
9
+ version 2.1 of the License, or (at your option) any later version.
10
+
11
+ This library is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ Lesser General Public License for more details.
15
+
16
+ You should have received a copy of the GNU Lesser General Public
17
+ License along with this library; if not, write to the Free Software
18
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ */
20
+
21
+ #ifndef nsEscCharSetProber_h__
22
+ #define nsEscCharSetProber_h__
23
+
24
+ #include "charsetProber.h"
25
+ #include "codingStateMachine.h"
26
+
27
+ #define NUM_OF_ESC_CHARSETS 4
28
+
29
+ class nsEscCharSetProber: public nsCharSetProber {
30
+ public:
31
+ nsEscCharSetProber(void);
32
+ virtual ~nsEscCharSetProber(void);
33
+ nsProbingState HandleData(const char* aBuf, PRUint32 aLen);
34
+ const char* GetCharSetName() {return mDetectedCharset;};
35
+ nsProbingState GetState(void) {return mState;};
36
+ void Reset(void);
37
+ float GetConfidence(void){return (float)0.99;};
38
+ void SetOpion() {};
39
+
40
+ protected:
41
+ void GetDistribution(PRUint32 aCharLen, const char* aStr);
42
+
43
+ nsCodingStateMachine* mCodingSM[NUM_OF_ESC_CHARSETS] ;
44
+ PRUint32 mActiveSM;
45
+ nsProbingState mState;
46
+ const char * mDetectedCharset;
47
+ };
48
+
49
+ #endif /* nsEscCharSetProber_h__ */
@@ -0,0 +1,244 @@
1
+ /*
2
+ libcharguess - Guess the encoding/charset of a string
3
+ Copyright (C) 2003 Stephane Corbe <noubi@users.sourceforge.net>
4
+ Based on Mozilla sources
5
+
6
+ This library is free software; you can redistribute it and/or
7
+ modify it under the terms of the GNU Lesser General Public
8
+ License as published by the Free Software Foundation; either
9
+ version 2.1 of the License, or (at your option) any later version.
10
+
11
+ This library is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ Lesser General Public License for more details.
15
+
16
+ You should have received a copy of the GNU Lesser General Public
17
+ License along with this library; if not, write to the Free Software
18
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ */
20
+
21
+ #include "codingStateMachine.h"
22
+
23
+ static PRUint32 HZ_cls[ 256 / 8 ] = {
24
+ PCK4BITS(1,0,0,0,0,0,0,0), // 00 - 07
25
+ PCK4BITS(0,0,0,0,0,0,0,0), // 08 - 0f
26
+ PCK4BITS(0,0,0,0,0,0,0,0), // 10 - 17
27
+ PCK4BITS(0,0,0,1,0,0,0,0), // 18 - 1f
28
+ PCK4BITS(0,0,0,0,0,0,0,0), // 20 - 27
29
+ PCK4BITS(0,0,0,0,0,0,0,0), // 28 - 2f
30
+ PCK4BITS(0,0,0,0,0,0,0,0), // 30 - 37
31
+ PCK4BITS(0,0,0,0,0,0,0,0), // 38 - 3f
32
+ PCK4BITS(0,0,0,0,0,0,0,0), // 40 - 47
33
+ PCK4BITS(0,0,0,0,0,0,0,0), // 48 - 4f
34
+ PCK4BITS(0,0,0,0,0,0,0,0), // 50 - 57
35
+ PCK4BITS(0,0,0,0,0,0,0,0), // 58 - 5f
36
+ PCK4BITS(0,0,0,0,0,0,0,0), // 60 - 67
37
+ PCK4BITS(0,0,0,0,0,0,0,0), // 68 - 6f
38
+ PCK4BITS(0,0,0,0,0,0,0,0), // 70 - 77
39
+ PCK4BITS(0,0,0,4,0,5,2,0), // 78 - 7f
40
+ PCK4BITS(1,1,1,1,1,1,1,1), // 80 - 87
41
+ PCK4BITS(1,1,1,1,1,1,1,1), // 88 - 8f
42
+ PCK4BITS(1,1,1,1,1,1,1,1), // 90 - 97
43
+ PCK4BITS(1,1,1,1,1,1,1,1), // 98 - 9f
44
+ PCK4BITS(1,1,1,1,1,1,1,1), // a0 - a7
45
+ PCK4BITS(1,1,1,1,1,1,1,1), // a8 - af
46
+ PCK4BITS(1,1,1,1,1,1,1,1), // b0 - b7
47
+ PCK4BITS(1,1,1,1,1,1,1,1), // b8 - bf
48
+ PCK4BITS(1,1,1,1,1,1,1,1), // c0 - c7
49
+ PCK4BITS(1,1,1,1,1,1,1,1), // c8 - cf
50
+ PCK4BITS(1,1,1,1,1,1,1,1), // d0 - d7
51
+ PCK4BITS(1,1,1,1,1,1,1,1), // d8 - df
52
+ PCK4BITS(1,1,1,1,1,1,1,1), // e0 - e7
53
+ PCK4BITS(1,1,1,1,1,1,1,1), // e8 - ef
54
+ PCK4BITS(1,1,1,1,1,1,1,1), // f0 - f7
55
+ PCK4BITS(1,1,1,1,1,1,1,1) // f8 - ff
56
+ };
57
+
58
+
59
+ static PRUint32 HZ_st [ 6] = {
60
+ PCK4BITS(eStart,eError, 3,eStart,eStart,eStart,eError,eError),//00-07
61
+ PCK4BITS(eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe),//08-0f
62
+ PCK4BITS(eItsMe,eItsMe,eError,eError,eStart,eStart, 4,eError),//10-17
63
+ PCK4BITS( 5,eError, 6,eError, 5, 5, 4,eError),//18-1f
64
+ PCK4BITS( 4,eError, 4, 4, 4,eError, 4,eError),//20-27
65
+ PCK4BITS( 4,eItsMe,eStart,eStart,eStart,eStart,eStart,eStart) //28-2f
66
+ };
67
+
68
+ static const PRUint32 HZCharLenTable[] = {0, 0, 0, 0, 0, 0};
69
+
70
+ SMModel HZSMModel = {
71
+ {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, HZ_cls },
72
+ 6,
73
+ {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, HZ_st },
74
+ HZCharLenTable,
75
+ "HZ-GB-2312",
76
+ };
77
+
78
+
79
+ static PRUint32 ISO2022CN_cls [ 256 / 8 ] = {
80
+ PCK4BITS(2,0,0,0,0,0,0,0), // 00 - 07
81
+ PCK4BITS(0,0,0,0,0,0,0,0), // 08 - 0f
82
+ PCK4BITS(0,0,0,0,0,0,0,0), // 10 - 17
83
+ PCK4BITS(0,0,0,1,0,0,0,0), // 18 - 1f
84
+ PCK4BITS(0,0,0,0,0,0,0,0), // 20 - 27
85
+ PCK4BITS(0,3,0,0,0,0,0,0), // 28 - 2f
86
+ PCK4BITS(0,0,0,0,0,0,0,0), // 30 - 37
87
+ PCK4BITS(0,0,0,0,0,0,0,0), // 38 - 3f
88
+ PCK4BITS(0,0,0,4,0,0,0,0), // 40 - 47
89
+ PCK4BITS(0,0,0,0,0,0,0,0), // 48 - 4f
90
+ PCK4BITS(0,0,0,0,0,0,0,0), // 50 - 57
91
+ PCK4BITS(0,0,0,0,0,0,0,0), // 58 - 5f
92
+ PCK4BITS(0,0,0,0,0,0,0,0), // 60 - 67
93
+ PCK4BITS(0,0,0,0,0,0,0,0), // 68 - 6f
94
+ PCK4BITS(0,0,0,0,0,0,0,0), // 70 - 77
95
+ PCK4BITS(0,0,0,0,0,0,0,0), // 78 - 7f
96
+ PCK4BITS(2,2,2,2,2,2,2,2), // 80 - 87
97
+ PCK4BITS(2,2,2,2,2,2,2,2), // 88 - 8f
98
+ PCK4BITS(2,2,2,2,2,2,2,2), // 90 - 97
99
+ PCK4BITS(2,2,2,2,2,2,2,2), // 98 - 9f
100
+ PCK4BITS(2,2,2,2,2,2,2,2), // a0 - a7
101
+ PCK4BITS(2,2,2,2,2,2,2,2), // a8 - af
102
+ PCK4BITS(2,2,2,2,2,2,2,2), // b0 - b7
103
+ PCK4BITS(2,2,2,2,2,2,2,2), // b8 - bf
104
+ PCK4BITS(2,2,2,2,2,2,2,2), // c0 - c7
105
+ PCK4BITS(2,2,2,2,2,2,2,2), // c8 - cf
106
+ PCK4BITS(2,2,2,2,2,2,2,2), // d0 - d7
107
+ PCK4BITS(2,2,2,2,2,2,2,2), // d8 - df
108
+ PCK4BITS(2,2,2,2,2,2,2,2), // e0 - e7
109
+ PCK4BITS(2,2,2,2,2,2,2,2), // e8 - ef
110
+ PCK4BITS(2,2,2,2,2,2,2,2), // f0 - f7
111
+ PCK4BITS(2,2,2,2,2,2,2,2) // f8 - ff
112
+ };
113
+
114
+
115
+ static PRUint32 ISO2022CN_st [ 8] = {
116
+ PCK4BITS(eStart, 3,eError,eStart,eStart,eStart,eStart,eStart),//00-07
117
+ PCK4BITS(eStart,eError,eError,eError,eError,eError,eError,eError),//08-0f
118
+ PCK4BITS(eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe),//10-17
119
+ PCK4BITS(eItsMe,eItsMe,eItsMe,eError,eError,eError, 4,eError),//18-1f
120
+ PCK4BITS(eError,eError,eError,eItsMe,eError,eError,eError,eError),//20-27
121
+ PCK4BITS( 5, 6,eError,eError,eError,eError,eError,eError),//28-2f
122
+ PCK4BITS(eError,eError,eError,eItsMe,eError,eError,eError,eError),//30-37
123
+ PCK4BITS(eError,eError,eError,eError,eError,eItsMe,eError,eStart) //38-3f
124
+ };
125
+
126
+ static const PRUint32 ISO2022CNCharLenTable[] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
127
+
128
+ SMModel ISO2022CNSMModel = {
129
+ {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022CN_cls },
130
+ 9,
131
+ {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022CN_st },
132
+ ISO2022CNCharLenTable,
133
+ "ISO-2022-CN",
134
+ };
135
+
136
+ static PRUint32 ISO2022JP_cls [ 256 / 8 ] = {
137
+ PCK4BITS(2,0,0,0,0,0,0,0), // 00 - 07
138
+ PCK4BITS(0,0,0,0,0,0,2,2), // 08 - 0f
139
+ PCK4BITS(0,0,0,0,0,0,0,0), // 10 - 17
140
+ PCK4BITS(0,0,0,1,0,0,0,0), // 18 - 1f
141
+ PCK4BITS(0,0,0,0,7,0,0,0), // 20 - 27
142
+ PCK4BITS(3,0,0,0,0,0,0,0), // 28 - 2f
143
+ PCK4BITS(0,0,0,0,0,0,0,0), // 30 - 37
144
+ PCK4BITS(0,0,0,0,0,0,0,0), // 38 - 3f
145
+ PCK4BITS(6,0,4,0,0,0,0,0), // 40 - 47
146
+ PCK4BITS(0,0,5,0,0,0,0,0), // 48 - 4f
147
+ PCK4BITS(0,0,0,0,0,0,0,0), // 50 - 57
148
+ PCK4BITS(0,0,0,0,0,0,0,0), // 58 - 5f
149
+ PCK4BITS(0,0,0,0,0,0,0,0), // 60 - 67
150
+ PCK4BITS(0,0,0,0,0,0,0,0), // 68 - 6f
151
+ PCK4BITS(0,0,0,0,0,0,0,0), // 70 - 77
152
+ PCK4BITS(0,0,0,0,0,0,0,0), // 78 - 7f
153
+ PCK4BITS(2,2,2,2,2,2,2,2), // 80 - 87
154
+ PCK4BITS(2,2,2,2,2,2,2,2), // 88 - 8f
155
+ PCK4BITS(2,2,2,2,2,2,2,2), // 90 - 97
156
+ PCK4BITS(2,2,2,2,2,2,2,2), // 98 - 9f
157
+ PCK4BITS(2,2,2,2,2,2,2,2), // a0 - a7
158
+ PCK4BITS(2,2,2,2,2,2,2,2), // a8 - af
159
+ PCK4BITS(2,2,2,2,2,2,2,2), // b0 - b7
160
+ PCK4BITS(2,2,2,2,2,2,2,2), // b8 - bf
161
+ PCK4BITS(2,2,2,2,2,2,2,2), // c0 - c7
162
+ PCK4BITS(2,2,2,2,2,2,2,2), // c8 - cf
163
+ PCK4BITS(2,2,2,2,2,2,2,2), // d0 - d7
164
+ PCK4BITS(2,2,2,2,2,2,2,2), // d8 - df
165
+ PCK4BITS(2,2,2,2,2,2,2,2), // e0 - e7
166
+ PCK4BITS(2,2,2,2,2,2,2,2), // e8 - ef
167
+ PCK4BITS(2,2,2,2,2,2,2,2), // f0 - f7
168
+ PCK4BITS(2,2,2,2,2,2,2,2) // f8 - ff
169
+ };
170
+
171
+
172
+ static PRUint32 ISO2022JP_st [ 6] = {
173
+ PCK4BITS(eStart, 3,eError,eStart,eStart,eStart,eStart,eStart),//00-07
174
+ PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//08-0f
175
+ PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe),//10-17
176
+ PCK4BITS(eError,eError,eError, 5,eError,eError,eError, 4),//18-1f
177
+ PCK4BITS(eError,eError,eError,eError,eItsMe,eError,eItsMe,eError),//20-27
178
+ PCK4BITS(eError,eError,eError,eError,eItsMe,eItsMe,eError,eError) //28-2f
179
+ };
180
+
181
+ static const PRUint32 ISO2022JPCharLenTable[] = {0, 0, 0, 0, 0, 0, 0, 0};
182
+
183
+ SMModel ISO2022JPSMModel = {
184
+ {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022JP_cls },
185
+ 8,
186
+ {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022JP_st },
187
+ ISO2022JPCharLenTable,
188
+ "ISO-2022-JP",
189
+ };
190
+
191
+ static PRUint32 ISO2022KR_cls [ 256 / 8 ] = {
192
+ PCK4BITS(2,0,0,0,0,0,0,0), // 00 - 07
193
+ PCK4BITS(0,0,0,0,0,0,0,0), // 08 - 0f
194
+ PCK4BITS(0,0,0,0,0,0,0,0), // 10 - 17
195
+ PCK4BITS(0,0,0,1,0,0,0,0), // 18 - 1f
196
+ PCK4BITS(0,0,0,0,3,0,0,0), // 20 - 27
197
+ PCK4BITS(0,4,0,0,0,0,0,0), // 28 - 2f
198
+ PCK4BITS(0,0,0,0,0,0,0,0), // 30 - 37
199
+ PCK4BITS(0,0,0,0,0,0,0,0), // 38 - 3f
200
+ PCK4BITS(0,0,0,5,0,0,0,0), // 40 - 47
201
+ PCK4BITS(0,0,0,0,0,0,0,0), // 48 - 4f
202
+ PCK4BITS(0,0,0,0,0,0,0,0), // 50 - 57
203
+ PCK4BITS(0,0,0,0,0,0,0,0), // 58 - 5f
204
+ PCK4BITS(0,0,0,0,0,0,0,0), // 60 - 67
205
+ PCK4BITS(0,0,0,0,0,0,0,0), // 68 - 6f
206
+ PCK4BITS(0,0,0,0,0,0,0,0), // 70 - 77
207
+ PCK4BITS(0,0,0,0,0,0,0,0), // 78 - 7f
208
+ PCK4BITS(2,2,2,2,2,2,2,2), // 80 - 87
209
+ PCK4BITS(2,2,2,2,2,2,2,2), // 88 - 8f
210
+ PCK4BITS(2,2,2,2,2,2,2,2), // 90 - 97
211
+ PCK4BITS(2,2,2,2,2,2,2,2), // 98 - 9f
212
+ PCK4BITS(2,2,2,2,2,2,2,2), // a0 - a7
213
+ PCK4BITS(2,2,2,2,2,2,2,2), // a8 - af
214
+ PCK4BITS(2,2,2,2,2,2,2,2), // b0 - b7
215
+ PCK4BITS(2,2,2,2,2,2,2,2), // b8 - bf
216
+ PCK4BITS(2,2,2,2,2,2,2,2), // c0 - c7
217
+ PCK4BITS(2,2,2,2,2,2,2,2), // c8 - cf
218
+ PCK4BITS(2,2,2,2,2,2,2,2), // d0 - d7
219
+ PCK4BITS(2,2,2,2,2,2,2,2), // d8 - df
220
+ PCK4BITS(2,2,2,2,2,2,2,2), // e0 - e7
221
+ PCK4BITS(2,2,2,2,2,2,2,2), // e8 - ef
222
+ PCK4BITS(2,2,2,2,2,2,2,2), // f0 - f7
223
+ PCK4BITS(2,2,2,2,2,2,2,2) // f8 - ff
224
+ };
225
+
226
+
227
+ static PRUint32 ISO2022KR_st [ 5] = {
228
+ PCK4BITS(eStart, 3,eError,eStart,eStart,eStart,eError,eError),//00-07
229
+ PCK4BITS(eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe),//08-0f
230
+ PCK4BITS(eItsMe,eItsMe,eError,eError,eError, 4,eError,eError),//10-17
231
+ PCK4BITS(eError,eError,eError,eError, 5,eError,eError,eError),//18-1f
232
+ PCK4BITS(eError,eError,eError,eItsMe,eStart,eStart,eStart,eStart) //20-27
233
+ };
234
+
235
+ static const PRUint32 ISO2022KRCharLenTable[] = {0, 0, 0, 0, 0, 0};
236
+
237
+ SMModel ISO2022KRSMModel = {
238
+ {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022KR_cls },
239
+ 6,
240
+ {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022KR_st },
241
+ ISO2022KRCharLenTable,
242
+ "ISO-2022-KR",
243
+ };
244
+