charguess 1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. data/History.txt +4 -0
  2. data/Manifest.txt +134 -0
  3. data/PostInstall.txt +7 -0
  4. data/README.rdoc +26 -0
  5. data/Rakefile +25 -0
  6. data/ext/charguess/charguess.c +29 -0
  7. data/ext/charguess/extconf.rb +11 -0
  8. data/ext/libcharguess/cpp/.deps/EUCJPProber.Plo +1 -0
  9. data/ext/libcharguess/cpp/.deps/EUCJPProber.Po +87 -0
  10. data/ext/libcharguess/cpp/.deps/EUCKRProber.Plo +1 -0
  11. data/ext/libcharguess/cpp/.deps/EUCKRProber.Po +85 -0
  12. data/ext/libcharguess/cpp/.deps/EUCTWProber.Plo +1 -0
  13. data/ext/libcharguess/cpp/.deps/EUCTWProber.Po +85 -0
  14. data/ext/libcharguess/cpp/.deps/EscCharsetProber.Plo +1 -0
  15. data/ext/libcharguess/cpp/.deps/EscCharsetProber.Po +83 -0
  16. data/ext/libcharguess/cpp/.deps/EscSM.Plo +1 -0
  17. data/ext/libcharguess/cpp/.deps/EscSM.Po +77 -0
  18. data/ext/libcharguess/cpp/.deps/GB2312Prober.Plo +1 -0
  19. data/ext/libcharguess/cpp/.deps/GB2312Prober.Po +85 -0
  20. data/ext/libcharguess/cpp/.deps/LangBulgarianModel.Plo +1 -0
  21. data/ext/libcharguess/cpp/.deps/LangBulgarianModel.Po +78 -0
  22. data/ext/libcharguess/cpp/.deps/LangCyrillicModel.Plo +1 -0
  23. data/ext/libcharguess/cpp/.deps/LangCyrillicModel.Po +78 -0
  24. data/ext/libcharguess/cpp/.deps/LangGreekModel.Plo +1 -0
  25. data/ext/libcharguess/cpp/.deps/LangGreekModel.Po +78 -0
  26. data/ext/libcharguess/cpp/.deps/LangHungarianModel.Plo +1 -0
  27. data/ext/libcharguess/cpp/.deps/LangHungarianModel.Po +78 -0
  28. data/ext/libcharguess/cpp/.deps/LangThaiModel.Plo +1 -0
  29. data/ext/libcharguess/cpp/.deps/LangThaiModel.Po +78 -0
  30. data/ext/libcharguess/cpp/.deps/Latin1Prober.Plo +1 -0
  31. data/ext/libcharguess/cpp/.deps/Latin1Prober.Po +78 -0
  32. data/ext/libcharguess/cpp/.deps/MBCSGroupProber.Plo +1 -0
  33. data/ext/libcharguess/cpp/.deps/MBCSGroupProber.Po +102 -0
  34. data/ext/libcharguess/cpp/.deps/MBCSSM.Plo +1 -0
  35. data/ext/libcharguess/cpp/.deps/MBCSSM.Po +77 -0
  36. data/ext/libcharguess/cpp/.deps/SBCSGroupProber.Plo +1 -0
  37. data/ext/libcharguess/cpp/.deps/SBCSGroupProber.Po +80 -0
  38. data/ext/libcharguess/cpp/.deps/SBCharsetProber.Plo +1 -0
  39. data/ext/libcharguess/cpp/.deps/SBCharsetProber.Po +78 -0
  40. data/ext/libcharguess/cpp/.deps/SJISProber.Plo +1 -0
  41. data/ext/libcharguess/cpp/.deps/SJISProber.Po +86 -0
  42. data/ext/libcharguess/cpp/.deps/UTF8Prober.Plo +1 -0
  43. data/ext/libcharguess/cpp/.deps/UTF8Prober.Po +82 -0
  44. data/ext/libcharguess/cpp/.deps/big5Prober.Plo +1 -0
  45. data/ext/libcharguess/cpp/.deps/big5Prober.Po +84 -0
  46. data/ext/libcharguess/cpp/.deps/charDistribution.Plo +1 -0
  47. data/ext/libcharguess/cpp/.deps/charDistribution.Po +87 -0
  48. data/ext/libcharguess/cpp/.deps/chardet.Plo +1 -0
  49. data/ext/libcharguess/cpp/.deps/chardet.Po +84 -0
  50. data/ext/libcharguess/cpp/.deps/charguess.Po +77 -0
  51. data/ext/libcharguess/cpp/.deps/jpCntx.Plo +1 -0
  52. data/ext/libcharguess/cpp/.deps/jpCntx.Po +75 -0
  53. data/ext/libcharguess/cpp/.deps/universal.Plo +1 -0
  54. data/ext/libcharguess/cpp/.deps/universal.Po +111 -0
  55. data/ext/libcharguess/cpp/AUTHORS +3 -0
  56. data/ext/libcharguess/cpp/Big5Freq.tab +928 -0
  57. data/ext/libcharguess/cpp/COPYING +340 -0
  58. data/ext/libcharguess/cpp/COPYRIGHT +20 -0
  59. data/ext/libcharguess/cpp/ChangeLog +0 -0
  60. data/ext/libcharguess/cpp/EUCJPProber.cpp +80 -0
  61. data/ext/libcharguess/cpp/EUCJPProber.h +58 -0
  62. data/ext/libcharguess/cpp/EUCKRFreq.tab +615 -0
  63. data/ext/libcharguess/cpp/EUCKRProber.cpp +80 -0
  64. data/ext/libcharguess/cpp/EUCKRProber.h +54 -0
  65. data/ext/libcharguess/cpp/EUCTWFreq.tab +448 -0
  66. data/ext/libcharguess/cpp/EUCTWProber.cpp +79 -0
  67. data/ext/libcharguess/cpp/EUCTWProber.h +53 -0
  68. data/ext/libcharguess/cpp/EscCharsetProber.cpp +89 -0
  69. data/ext/libcharguess/cpp/EscCharsetProber.h +49 -0
  70. data/ext/libcharguess/cpp/EscSM.cpp +244 -0
  71. data/ext/libcharguess/cpp/GB2312Freq.tab +476 -0
  72. data/ext/libcharguess/cpp/GB2312Prober.cpp +84 -0
  73. data/ext/libcharguess/cpp/GB2312Prober.h +56 -0
  74. data/ext/libcharguess/cpp/INSTALL +229 -0
  75. data/ext/libcharguess/cpp/JISFreq.tab +574 -0
  76. data/ext/libcharguess/cpp/LICENSE +504 -0
  77. data/ext/libcharguess/cpp/LangBulgarianModel.cpp +230 -0
  78. data/ext/libcharguess/cpp/LangCyrillicModel.cpp +340 -0
  79. data/ext/libcharguess/cpp/LangGreekModel.cpp +229 -0
  80. data/ext/libcharguess/cpp/LangHungarianModel.cpp +228 -0
  81. data/ext/libcharguess/cpp/LangThaiModel.cpp +206 -0
  82. data/ext/libcharguess/cpp/Latin1Prober.cpp +190 -0
  83. data/ext/libcharguess/cpp/Latin1Prober.h +49 -0
  84. data/ext/libcharguess/cpp/MBCSGroupProber.cpp +186 -0
  85. data/ext/libcharguess/cpp/MBCSGroupProber.h +58 -0
  86. data/ext/libcharguess/cpp/MBCSSM.cpp +610 -0
  87. data/ext/libcharguess/cpp/Makefile.am +45 -0
  88. data/ext/libcharguess/cpp/Makefile.in +608 -0
  89. data/ext/libcharguess/cpp/NEWS +0 -0
  90. data/ext/libcharguess/cpp/README +0 -0
  91. data/ext/libcharguess/cpp/SBCSGroupProber.cpp +244 -0
  92. data/ext/libcharguess/cpp/SBCSGroupProber.h +54 -0
  93. data/ext/libcharguess/cpp/SBCharsetProber.cpp +100 -0
  94. data/ext/libcharguess/cpp/SBCharsetProber.h +89 -0
  95. data/ext/libcharguess/cpp/SJISProber.cpp +86 -0
  96. data/ext/libcharguess/cpp/SJISProber.h +60 -0
  97. data/ext/libcharguess/cpp/UTF8Prober.cpp +75 -0
  98. data/ext/libcharguess/cpp/UTF8Prober.h +46 -0
  99. data/ext/libcharguess/cpp/aclocal.m4 +1008 -0
  100. data/ext/libcharguess/cpp/autogen.sh +153 -0
  101. data/ext/libcharguess/cpp/big5Prober.cpp +76 -0
  102. data/ext/libcharguess/cpp/big5Prober.h +53 -0
  103. data/ext/libcharguess/cpp/charDistribution.cpp +90 -0
  104. data/ext/libcharguess/cpp/charDistribution.h +219 -0
  105. data/ext/libcharguess/cpp/charguess.cpp +56 -0
  106. data/ext/libcharguess/cpp/charguess.h +23 -0
  107. data/ext/libcharguess/cpp/charsetProber.h +50 -0
  108. data/ext/libcharguess/cpp/codingStateMachine.h +92 -0
  109. data/ext/libcharguess/cpp/config.h +36 -0
  110. data/ext/libcharguess/cpp/config.h.in +35 -0
  111. data/ext/libcharguess/cpp/config.status +1075 -0
  112. data/ext/libcharguess/cpp/configure +5226 -0
  113. data/ext/libcharguess/cpp/configure.in +49 -0
  114. data/ext/libcharguess/cpp/depcomp +472 -0
  115. data/ext/libcharguess/cpp/fix_copyright +32 -0
  116. data/ext/libcharguess/cpp/install-sh +294 -0
  117. data/ext/libcharguess/cpp/jpCntx.cpp +194 -0
  118. data/ext/libcharguess/cpp/jpCntx.h +100 -0
  119. data/ext/libcharguess/cpp/missing +336 -0
  120. data/ext/libcharguess/cpp/mkinstalldirs +111 -0
  121. data/ext/libcharguess/cpp/pkgInt.h +72 -0
  122. data/ext/libcharguess/cpp/stamp-h1 +1 -0
  123. data/ext/libcharguess/cpp/test/test.cpp +78 -0
  124. data/ext/libcharguess/cpp/types.h +41 -0
  125. data/ext/libcharguess/cpp/universal.cpp +273 -0
  126. data/ext/libcharguess/cpp/universal.h +65 -0
  127. data/script/console +9 -0
  128. data/script/destroy +14 -0
  129. data/script/generate +14 -0
  130. data/tasks/extconf/charguess.rake +47 -0
  131. data/tasks/extconf.rake +13 -0
  132. data/test/test_charguess.rb +7 -0
  133. data/test/test_charguess_extn.rb +10 -0
  134. data/test/test_helper.rb +3 -0
  135. metadata +219 -0
@@ -0,0 +1,79 @@
1
+ /*
2
+ libcharguess - Guess the encoding/charset of a string
3
+ Copyright (C) 2003 Stephane Corbe <noubi@users.sourceforge.net>
4
+ Based on Mozilla sources
5
+
6
+ This library is free software; you can redistribute it and/or
7
+ modify it under the terms of the GNU Lesser General Public
8
+ License as published by the Free Software Foundation; either
9
+ version 2.1 of the License, or (at your option) any later version.
10
+
11
+ This library is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ Lesser General Public License for more details.
15
+
16
+ You should have received a copy of the GNU Lesser General Public
17
+ License along with this library; if not, write to the Free Software
18
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ */
20
+
21
+ #include "EUCTWProber.h"
22
+
23
+ void nsEUCTWProber::Reset(void)
24
+ {
25
+ mCodingSM->Reset();
26
+ mState = eDetecting;
27
+ mDistributionAnalyser.Reset();
28
+ //mContextAnalyser.Reset();
29
+ }
30
+
31
+ nsProbingState nsEUCTWProber::HandleData(const char* aBuf, PRUint32 aLen)
32
+ {
33
+ nsSMState codingState;
34
+
35
+ for (PRUint32 i = 0; i < aLen; i++)
36
+ {
37
+ codingState = mCodingSM->NextState(aBuf[i]);
38
+ if (codingState == eError)
39
+ {
40
+ mState = eNotMe;
41
+ break;
42
+ }
43
+ if (codingState == eItsMe)
44
+ {
45
+ mState = eFoundIt;
46
+ break;
47
+ }
48
+ if (codingState == eStart)
49
+ {
50
+ PRUint32 charLen = mCodingSM->GetCurrentCharLen();
51
+
52
+ if (i == 0)
53
+ {
54
+ mLastChar[1] = aBuf[0];
55
+ mDistributionAnalyser.HandleOneChar(mLastChar, charLen);
56
+ }
57
+ else
58
+ mDistributionAnalyser.HandleOneChar(aBuf+i-1, charLen);
59
+ }
60
+ }
61
+
62
+ mLastChar[0] = aBuf[aLen-1];
63
+
64
+ if (mState == eDetecting)
65
+ if (mDistributionAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD)
66
+ mState = eFoundIt;
67
+ // else
68
+ // mDistributionAnalyser.HandleData(aBuf, aLen);
69
+
70
+ return mState;
71
+ }
72
+
73
+ float nsEUCTWProber::GetConfidence(void)
74
+ {
75
+ float distribCf = mDistributionAnalyser.GetConfidence();
76
+
77
+ return (float)distribCf;
78
+ }
79
+
@@ -0,0 +1,53 @@
1
+ /*
2
+ libcharguess - Guess the encoding/charset of a string
3
+ Copyright (C) 2003 Stephane Corbe <noubi@users.sourceforge.net>
4
+ Based on Mozilla sources
5
+
6
+ This library is free software; you can redistribute it and/or
7
+ modify it under the terms of the GNU Lesser General Public
8
+ License as published by the Free Software Foundation; either
9
+ version 2.1 of the License, or (at your option) any later version.
10
+
11
+ This library is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ Lesser General Public License for more details.
15
+
16
+ You should have received a copy of the GNU Lesser General Public
17
+ License along with this library; if not, write to the Free Software
18
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ */
20
+
21
+ #ifndef nsEUCTWProber_h__
22
+ #define nsEUCTWProber_h__
23
+
24
+ #include "charsetProber.h"
25
+ #include "codingStateMachine.h"
26
+ #include "charDistribution.h"
27
+
28
+ class nsEUCTWProber: public nsCharSetProber {
29
+ public:
30
+ nsEUCTWProber(void){mCodingSM = new nsCodingStateMachine(&EUCTWSMModel);
31
+ Reset();};
32
+ virtual ~nsEUCTWProber(void){delete mCodingSM;};
33
+ nsProbingState HandleData(const char* aBuf, PRUint32 aLen);
34
+ const char* GetCharSetName() {return "x-euc-tw";};
35
+ nsProbingState GetState(void) {return mState;};
36
+ void Reset(void);
37
+ float GetConfidence(void);
38
+ void SetOpion() {};
39
+
40
+ protected:
41
+ void GetDistribution(PRUint32 aCharLen, const char* aStr);
42
+
43
+ nsCodingStateMachine* mCodingSM;
44
+ nsProbingState mState;
45
+
46
+ //EUCTWContextAnalysis mContextAnalyser;
47
+ EUCTWDistributionAnalysis mDistributionAnalyser;
48
+ char mLastChar[2];
49
+
50
+ };
51
+
52
+
53
+ #endif /* nsEUCTWProber_h__ */
@@ -0,0 +1,89 @@
1
+ /*
2
+ libcharguess - Guess the encoding/charset of a string
3
+ Copyright (C) 2003 Stephane Corbe <noubi@users.sourceforge.net>
4
+ Based on Mozilla sources
5
+
6
+ This library is free software; you can redistribute it and/or
7
+ modify it under the terms of the GNU Lesser General Public
8
+ License as published by the Free Software Foundation; either
9
+ version 2.1 of the License, or (at your option) any later version.
10
+
11
+ This library is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ Lesser General Public License for more details.
15
+
16
+ You should have received a copy of the GNU Lesser General Public
17
+ License along with this library; if not, write to the Free Software
18
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ */
20
+
21
+ #include "EscCharsetProber.h"
22
+
23
+ nsEscCharSetProber::nsEscCharSetProber(void)
24
+ {
25
+ mCodingSM[0] = new nsCodingStateMachine(&HZSMModel);
26
+ mCodingSM[1] = new nsCodingStateMachine(&ISO2022CNSMModel);
27
+ mCodingSM[2] = new nsCodingStateMachine(&ISO2022JPSMModel);
28
+ mCodingSM[3] = new nsCodingStateMachine(&ISO2022KRSMModel);
29
+ mActiveSM = NUM_OF_ESC_CHARSETS;
30
+ mState = eDetecting;
31
+ mDetectedCharset = nsnull;
32
+ };
33
+
34
+ nsEscCharSetProber::~nsEscCharSetProber(void)
35
+ {
36
+ for (PRUint32 i = 0; i < NUM_OF_ESC_CHARSETS; i++)
37
+ delete mCodingSM[i];
38
+ }
39
+
40
+ void nsEscCharSetProber::Reset(void)
41
+ {
42
+ mState = eDetecting;
43
+ for (PRUint32 i = 0; i < NUM_OF_ESC_CHARSETS; i++)
44
+ mCodingSM[i]->Reset();
45
+ mActiveSM = NUM_OF_ESC_CHARSETS;
46
+ mDetectedCharset = nsnull;
47
+ }
48
+
49
+ nsProbingState nsEscCharSetProber::HandleData(const char* aBuf, PRUint32 aLen)
50
+ {
51
+ nsSMState codingState;
52
+ PRInt32 j;
53
+ PRUint32 i;
54
+
55
+ for ( i = 0; i < aLen && mState == eDetecting; i++)
56
+ {
57
+ for (j = mActiveSM-1; j>= 0; j--)
58
+ {
59
+ //byte is feed to all active state machine
60
+ codingState = mCodingSM[j]->NextState(aBuf[i]);
61
+ if (codingState == eError)
62
+ {
63
+ //got negative answer for this state machine, make it inactive
64
+ mActiveSM--;
65
+ if (mActiveSM == 0)
66
+ {
67
+ mState = eNotMe;
68
+ return mState;
69
+ }
70
+ else if (j != (PRInt32)mActiveSM)
71
+ {
72
+ nsCodingStateMachine* t;
73
+ t = mCodingSM[mActiveSM];
74
+ mCodingSM[mActiveSM] = mCodingSM[j];
75
+ mCodingSM[j] = t;
76
+ }
77
+ }
78
+ else if (codingState == eItsMe)
79
+ {
80
+ mState = eFoundIt;
81
+ mDetectedCharset = mCodingSM[j]->GetCodingStateMachine();
82
+ return mState;
83
+ }
84
+ }
85
+ }
86
+
87
+ return mState;
88
+ }
89
+
@@ -0,0 +1,49 @@
1
+ /*
2
+ libcharguess - Guess the encoding/charset of a string
3
+ Copyright (C) 2003 Stephane Corbe <noubi@users.sourceforge.net>
4
+ Based on Mozilla sources
5
+
6
+ This library is free software; you can redistribute it and/or
7
+ modify it under the terms of the GNU Lesser General Public
8
+ License as published by the Free Software Foundation; either
9
+ version 2.1 of the License, or (at your option) any later version.
10
+
11
+ This library is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ Lesser General Public License for more details.
15
+
16
+ You should have received a copy of the GNU Lesser General Public
17
+ License along with this library; if not, write to the Free Software
18
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ */
20
+
21
+ #ifndef nsEscCharSetProber_h__
22
+ #define nsEscCharSetProber_h__
23
+
24
+ #include "charsetProber.h"
25
+ #include "codingStateMachine.h"
26
+
27
+ #define NUM_OF_ESC_CHARSETS 4
28
+
29
+ class nsEscCharSetProber: public nsCharSetProber {
30
+ public:
31
+ nsEscCharSetProber(void);
32
+ virtual ~nsEscCharSetProber(void);
33
+ nsProbingState HandleData(const char* aBuf, PRUint32 aLen);
34
+ const char* GetCharSetName() {return mDetectedCharset;};
35
+ nsProbingState GetState(void) {return mState;};
36
+ void Reset(void);
37
+ float GetConfidence(void){return (float)0.99;};
38
+ void SetOpion() {};
39
+
40
+ protected:
41
+ void GetDistribution(PRUint32 aCharLen, const char* aStr);
42
+
43
+ nsCodingStateMachine* mCodingSM[NUM_OF_ESC_CHARSETS] ;
44
+ PRUint32 mActiveSM;
45
+ nsProbingState mState;
46
+ const char * mDetectedCharset;
47
+ };
48
+
49
+ #endif /* nsEscCharSetProber_h__ */
@@ -0,0 +1,244 @@
1
+ /*
2
+ libcharguess - Guess the encoding/charset of a string
3
+ Copyright (C) 2003 Stephane Corbe <noubi@users.sourceforge.net>
4
+ Based on Mozilla sources
5
+
6
+ This library is free software; you can redistribute it and/or
7
+ modify it under the terms of the GNU Lesser General Public
8
+ License as published by the Free Software Foundation; either
9
+ version 2.1 of the License, or (at your option) any later version.
10
+
11
+ This library is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ Lesser General Public License for more details.
15
+
16
+ You should have received a copy of the GNU Lesser General Public
17
+ License along with this library; if not, write to the Free Software
18
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ */
20
+
21
+ #include "codingStateMachine.h"
22
+
23
+ static PRUint32 HZ_cls[ 256 / 8 ] = {
24
+ PCK4BITS(1,0,0,0,0,0,0,0), // 00 - 07
25
+ PCK4BITS(0,0,0,0,0,0,0,0), // 08 - 0f
26
+ PCK4BITS(0,0,0,0,0,0,0,0), // 10 - 17
27
+ PCK4BITS(0,0,0,1,0,0,0,0), // 18 - 1f
28
+ PCK4BITS(0,0,0,0,0,0,0,0), // 20 - 27
29
+ PCK4BITS(0,0,0,0,0,0,0,0), // 28 - 2f
30
+ PCK4BITS(0,0,0,0,0,0,0,0), // 30 - 37
31
+ PCK4BITS(0,0,0,0,0,0,0,0), // 38 - 3f
32
+ PCK4BITS(0,0,0,0,0,0,0,0), // 40 - 47
33
+ PCK4BITS(0,0,0,0,0,0,0,0), // 48 - 4f
34
+ PCK4BITS(0,0,0,0,0,0,0,0), // 50 - 57
35
+ PCK4BITS(0,0,0,0,0,0,0,0), // 58 - 5f
36
+ PCK4BITS(0,0,0,0,0,0,0,0), // 60 - 67
37
+ PCK4BITS(0,0,0,0,0,0,0,0), // 68 - 6f
38
+ PCK4BITS(0,0,0,0,0,0,0,0), // 70 - 77
39
+ PCK4BITS(0,0,0,4,0,5,2,0), // 78 - 7f
40
+ PCK4BITS(1,1,1,1,1,1,1,1), // 80 - 87
41
+ PCK4BITS(1,1,1,1,1,1,1,1), // 88 - 8f
42
+ PCK4BITS(1,1,1,1,1,1,1,1), // 90 - 97
43
+ PCK4BITS(1,1,1,1,1,1,1,1), // 98 - 9f
44
+ PCK4BITS(1,1,1,1,1,1,1,1), // a0 - a7
45
+ PCK4BITS(1,1,1,1,1,1,1,1), // a8 - af
46
+ PCK4BITS(1,1,1,1,1,1,1,1), // b0 - b7
47
+ PCK4BITS(1,1,1,1,1,1,1,1), // b8 - bf
48
+ PCK4BITS(1,1,1,1,1,1,1,1), // c0 - c7
49
+ PCK4BITS(1,1,1,1,1,1,1,1), // c8 - cf
50
+ PCK4BITS(1,1,1,1,1,1,1,1), // d0 - d7
51
+ PCK4BITS(1,1,1,1,1,1,1,1), // d8 - df
52
+ PCK4BITS(1,1,1,1,1,1,1,1), // e0 - e7
53
+ PCK4BITS(1,1,1,1,1,1,1,1), // e8 - ef
54
+ PCK4BITS(1,1,1,1,1,1,1,1), // f0 - f7
55
+ PCK4BITS(1,1,1,1,1,1,1,1) // f8 - ff
56
+ };
57
+
58
+
59
+ static PRUint32 HZ_st [ 6] = {
60
+ PCK4BITS(eStart,eError, 3,eStart,eStart,eStart,eError,eError),//00-07
61
+ PCK4BITS(eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe),//08-0f
62
+ PCK4BITS(eItsMe,eItsMe,eError,eError,eStart,eStart, 4,eError),//10-17
63
+ PCK4BITS( 5,eError, 6,eError, 5, 5, 4,eError),//18-1f
64
+ PCK4BITS( 4,eError, 4, 4, 4,eError, 4,eError),//20-27
65
+ PCK4BITS( 4,eItsMe,eStart,eStart,eStart,eStart,eStart,eStart) //28-2f
66
+ };
67
+
68
+ static const PRUint32 HZCharLenTable[] = {0, 0, 0, 0, 0, 0};
69
+
70
+ SMModel HZSMModel = {
71
+ {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, HZ_cls },
72
+ 6,
73
+ {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, HZ_st },
74
+ HZCharLenTable,
75
+ "HZ-GB-2312",
76
+ };
77
+
78
+
79
+ static PRUint32 ISO2022CN_cls [ 256 / 8 ] = {
80
+ PCK4BITS(2,0,0,0,0,0,0,0), // 00 - 07
81
+ PCK4BITS(0,0,0,0,0,0,0,0), // 08 - 0f
82
+ PCK4BITS(0,0,0,0,0,0,0,0), // 10 - 17
83
+ PCK4BITS(0,0,0,1,0,0,0,0), // 18 - 1f
84
+ PCK4BITS(0,0,0,0,0,0,0,0), // 20 - 27
85
+ PCK4BITS(0,3,0,0,0,0,0,0), // 28 - 2f
86
+ PCK4BITS(0,0,0,0,0,0,0,0), // 30 - 37
87
+ PCK4BITS(0,0,0,0,0,0,0,0), // 38 - 3f
88
+ PCK4BITS(0,0,0,4,0,0,0,0), // 40 - 47
89
+ PCK4BITS(0,0,0,0,0,0,0,0), // 48 - 4f
90
+ PCK4BITS(0,0,0,0,0,0,0,0), // 50 - 57
91
+ PCK4BITS(0,0,0,0,0,0,0,0), // 58 - 5f
92
+ PCK4BITS(0,0,0,0,0,0,0,0), // 60 - 67
93
+ PCK4BITS(0,0,0,0,0,0,0,0), // 68 - 6f
94
+ PCK4BITS(0,0,0,0,0,0,0,0), // 70 - 77
95
+ PCK4BITS(0,0,0,0,0,0,0,0), // 78 - 7f
96
+ PCK4BITS(2,2,2,2,2,2,2,2), // 80 - 87
97
+ PCK4BITS(2,2,2,2,2,2,2,2), // 88 - 8f
98
+ PCK4BITS(2,2,2,2,2,2,2,2), // 90 - 97
99
+ PCK4BITS(2,2,2,2,2,2,2,2), // 98 - 9f
100
+ PCK4BITS(2,2,2,2,2,2,2,2), // a0 - a7
101
+ PCK4BITS(2,2,2,2,2,2,2,2), // a8 - af
102
+ PCK4BITS(2,2,2,2,2,2,2,2), // b0 - b7
103
+ PCK4BITS(2,2,2,2,2,2,2,2), // b8 - bf
104
+ PCK4BITS(2,2,2,2,2,2,2,2), // c0 - c7
105
+ PCK4BITS(2,2,2,2,2,2,2,2), // c8 - cf
106
+ PCK4BITS(2,2,2,2,2,2,2,2), // d0 - d7
107
+ PCK4BITS(2,2,2,2,2,2,2,2), // d8 - df
108
+ PCK4BITS(2,2,2,2,2,2,2,2), // e0 - e7
109
+ PCK4BITS(2,2,2,2,2,2,2,2), // e8 - ef
110
+ PCK4BITS(2,2,2,2,2,2,2,2), // f0 - f7
111
+ PCK4BITS(2,2,2,2,2,2,2,2) // f8 - ff
112
+ };
113
+
114
+
115
+ static PRUint32 ISO2022CN_st [ 8] = {
116
+ PCK4BITS(eStart, 3,eError,eStart,eStart,eStart,eStart,eStart),//00-07
117
+ PCK4BITS(eStart,eError,eError,eError,eError,eError,eError,eError),//08-0f
118
+ PCK4BITS(eError,eError,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe),//10-17
119
+ PCK4BITS(eItsMe,eItsMe,eItsMe,eError,eError,eError, 4,eError),//18-1f
120
+ PCK4BITS(eError,eError,eError,eItsMe,eError,eError,eError,eError),//20-27
121
+ PCK4BITS( 5, 6,eError,eError,eError,eError,eError,eError),//28-2f
122
+ PCK4BITS(eError,eError,eError,eItsMe,eError,eError,eError,eError),//30-37
123
+ PCK4BITS(eError,eError,eError,eError,eError,eItsMe,eError,eStart) //38-3f
124
+ };
125
+
126
+ static const PRUint32 ISO2022CNCharLenTable[] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
127
+
128
+ SMModel ISO2022CNSMModel = {
129
+ {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022CN_cls },
130
+ 9,
131
+ {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022CN_st },
132
+ ISO2022CNCharLenTable,
133
+ "ISO-2022-CN",
134
+ };
135
+
136
+ static PRUint32 ISO2022JP_cls [ 256 / 8 ] = {
137
+ PCK4BITS(2,0,0,0,0,0,0,0), // 00 - 07
138
+ PCK4BITS(0,0,0,0,0,0,2,2), // 08 - 0f
139
+ PCK4BITS(0,0,0,0,0,0,0,0), // 10 - 17
140
+ PCK4BITS(0,0,0,1,0,0,0,0), // 18 - 1f
141
+ PCK4BITS(0,0,0,0,7,0,0,0), // 20 - 27
142
+ PCK4BITS(3,0,0,0,0,0,0,0), // 28 - 2f
143
+ PCK4BITS(0,0,0,0,0,0,0,0), // 30 - 37
144
+ PCK4BITS(0,0,0,0,0,0,0,0), // 38 - 3f
145
+ PCK4BITS(6,0,4,0,0,0,0,0), // 40 - 47
146
+ PCK4BITS(0,0,5,0,0,0,0,0), // 48 - 4f
147
+ PCK4BITS(0,0,0,0,0,0,0,0), // 50 - 57
148
+ PCK4BITS(0,0,0,0,0,0,0,0), // 58 - 5f
149
+ PCK4BITS(0,0,0,0,0,0,0,0), // 60 - 67
150
+ PCK4BITS(0,0,0,0,0,0,0,0), // 68 - 6f
151
+ PCK4BITS(0,0,0,0,0,0,0,0), // 70 - 77
152
+ PCK4BITS(0,0,0,0,0,0,0,0), // 78 - 7f
153
+ PCK4BITS(2,2,2,2,2,2,2,2), // 80 - 87
154
+ PCK4BITS(2,2,2,2,2,2,2,2), // 88 - 8f
155
+ PCK4BITS(2,2,2,2,2,2,2,2), // 90 - 97
156
+ PCK4BITS(2,2,2,2,2,2,2,2), // 98 - 9f
157
+ PCK4BITS(2,2,2,2,2,2,2,2), // a0 - a7
158
+ PCK4BITS(2,2,2,2,2,2,2,2), // a8 - af
159
+ PCK4BITS(2,2,2,2,2,2,2,2), // b0 - b7
160
+ PCK4BITS(2,2,2,2,2,2,2,2), // b8 - bf
161
+ PCK4BITS(2,2,2,2,2,2,2,2), // c0 - c7
162
+ PCK4BITS(2,2,2,2,2,2,2,2), // c8 - cf
163
+ PCK4BITS(2,2,2,2,2,2,2,2), // d0 - d7
164
+ PCK4BITS(2,2,2,2,2,2,2,2), // d8 - df
165
+ PCK4BITS(2,2,2,2,2,2,2,2), // e0 - e7
166
+ PCK4BITS(2,2,2,2,2,2,2,2), // e8 - ef
167
+ PCK4BITS(2,2,2,2,2,2,2,2), // f0 - f7
168
+ PCK4BITS(2,2,2,2,2,2,2,2) // f8 - ff
169
+ };
170
+
171
+
172
+ static PRUint32 ISO2022JP_st [ 6] = {
173
+ PCK4BITS(eStart, 3,eError,eStart,eStart,eStart,eStart,eStart),//00-07
174
+ PCK4BITS(eError,eError,eError,eError,eError,eError,eError,eError),//08-0f
175
+ PCK4BITS(eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe,eItsMe),//10-17
176
+ PCK4BITS(eError,eError,eError, 5,eError,eError,eError, 4),//18-1f
177
+ PCK4BITS(eError,eError,eError,eError,eItsMe,eError,eItsMe,eError),//20-27
178
+ PCK4BITS(eError,eError,eError,eError,eItsMe,eItsMe,eError,eError) //28-2f
179
+ };
180
+
181
+ static const PRUint32 ISO2022JPCharLenTable[] = {0, 0, 0, 0, 0, 0, 0, 0};
182
+
183
+ SMModel ISO2022JPSMModel = {
184
+ {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022JP_cls },
185
+ 8,
186
+ {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022JP_st },
187
+ ISO2022JPCharLenTable,
188
+ "ISO-2022-JP",
189
+ };
190
+
191
+ static PRUint32 ISO2022KR_cls [ 256 / 8 ] = {
192
+ PCK4BITS(2,0,0,0,0,0,0,0), // 00 - 07
193
+ PCK4BITS(0,0,0,0,0,0,0,0), // 08 - 0f
194
+ PCK4BITS(0,0,0,0,0,0,0,0), // 10 - 17
195
+ PCK4BITS(0,0,0,1,0,0,0,0), // 18 - 1f
196
+ PCK4BITS(0,0,0,0,3,0,0,0), // 20 - 27
197
+ PCK4BITS(0,4,0,0,0,0,0,0), // 28 - 2f
198
+ PCK4BITS(0,0,0,0,0,0,0,0), // 30 - 37
199
+ PCK4BITS(0,0,0,0,0,0,0,0), // 38 - 3f
200
+ PCK4BITS(0,0,0,5,0,0,0,0), // 40 - 47
201
+ PCK4BITS(0,0,0,0,0,0,0,0), // 48 - 4f
202
+ PCK4BITS(0,0,0,0,0,0,0,0), // 50 - 57
203
+ PCK4BITS(0,0,0,0,0,0,0,0), // 58 - 5f
204
+ PCK4BITS(0,0,0,0,0,0,0,0), // 60 - 67
205
+ PCK4BITS(0,0,0,0,0,0,0,0), // 68 - 6f
206
+ PCK4BITS(0,0,0,0,0,0,0,0), // 70 - 77
207
+ PCK4BITS(0,0,0,0,0,0,0,0), // 78 - 7f
208
+ PCK4BITS(2,2,2,2,2,2,2,2), // 80 - 87
209
+ PCK4BITS(2,2,2,2,2,2,2,2), // 88 - 8f
210
+ PCK4BITS(2,2,2,2,2,2,2,2), // 90 - 97
211
+ PCK4BITS(2,2,2,2,2,2,2,2), // 98 - 9f
212
+ PCK4BITS(2,2,2,2,2,2,2,2), // a0 - a7
213
+ PCK4BITS(2,2,2,2,2,2,2,2), // a8 - af
214
+ PCK4BITS(2,2,2,2,2,2,2,2), // b0 - b7
215
+ PCK4BITS(2,2,2,2,2,2,2,2), // b8 - bf
216
+ PCK4BITS(2,2,2,2,2,2,2,2), // c0 - c7
217
+ PCK4BITS(2,2,2,2,2,2,2,2), // c8 - cf
218
+ PCK4BITS(2,2,2,2,2,2,2,2), // d0 - d7
219
+ PCK4BITS(2,2,2,2,2,2,2,2), // d8 - df
220
+ PCK4BITS(2,2,2,2,2,2,2,2), // e0 - e7
221
+ PCK4BITS(2,2,2,2,2,2,2,2), // e8 - ef
222
+ PCK4BITS(2,2,2,2,2,2,2,2), // f0 - f7
223
+ PCK4BITS(2,2,2,2,2,2,2,2) // f8 - ff
224
+ };
225
+
226
+
227
+ static PRUint32 ISO2022KR_st [ 5] = {
228
+ PCK4BITS(eStart, 3,eError,eStart,eStart,eStart,eError,eError),//00-07
229
+ PCK4BITS(eError,eError,eError,eError,eItsMe,eItsMe,eItsMe,eItsMe),//08-0f
230
+ PCK4BITS(eItsMe,eItsMe,eError,eError,eError, 4,eError,eError),//10-17
231
+ PCK4BITS(eError,eError,eError,eError, 5,eError,eError,eError),//18-1f
232
+ PCK4BITS(eError,eError,eError,eItsMe,eStart,eStart,eStart,eStart) //20-27
233
+ };
234
+
235
+ static const PRUint32 ISO2022KRCharLenTable[] = {0, 0, 0, 0, 0, 0};
236
+
237
+ SMModel ISO2022KRSMModel = {
238
+ {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022KR_cls },
239
+ 6,
240
+ {eIdxSft4bits, eSftMsk4bits, eBitSft4bits, eUnitMsk4bits, ISO2022KR_st },
241
+ ISO2022KRCharLenTable,
242
+ "ISO-2022-KR",
243
+ };
244
+