@datawrapper/jschardet 3.0.1-0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/CONTRIBUTORS +4 -0
  2. package/LICENSE +504 -0
  3. package/README.md +101 -0
  4. package/dist/jschardet.js +7859 -0
  5. package/dist/jschardet.min.js +669 -0
  6. package/index.d.ts +13 -0
  7. package/index.js +1 -0
  8. package/package.json +33 -0
  9. package/src/big5freq.js +925 -0
  10. package/src/big5prober.js +54 -0
  11. package/src/chardistribution.js +301 -0
  12. package/src/charsetgroupprober.js +120 -0
  13. package/src/charsetprober.js +104 -0
  14. package/src/codingstatemachine.js +71 -0
  15. package/src/constants.js +40 -0
  16. package/src/escprober.js +109 -0
  17. package/src/escsm.js +250 -0
  18. package/src/eucjpprober.js +107 -0
  19. package/src/euckrfreq.js +597 -0
  20. package/src/euckrprober.js +54 -0
  21. package/src/euctwfreq.js +429 -0
  22. package/src/euctwprober.js +54 -0
  23. package/src/gb2312freq.js +473 -0
  24. package/src/gb2312prober.js +54 -0
  25. package/src/hebrewprober.js +323 -0
  26. package/src/index.js +56 -0
  27. package/src/jisfreq.js +569 -0
  28. package/src/jpcntx.js +242 -0
  29. package/src/langbulgarianmodel.js +228 -0
  30. package/src/langcyrillicmodel.js +329 -0
  31. package/src/langgreekmodel.js +225 -0
  32. package/src/langhebrewmodel.js +199 -0
  33. package/src/langhungarianmodel.js +225 -0
  34. package/src/langthaimodel.js +200 -0
  35. package/src/latin1prober.js +168 -0
  36. package/src/logger.js +7 -0
  37. package/src/mbcharsetprober.js +99 -0
  38. package/src/mbcsgroupprober.js +64 -0
  39. package/src/mbcssm/big5.js +52 -0
  40. package/src/mbcssm/eucjp.js +54 -0
  41. package/src/mbcssm/euckr.js +51 -0
  42. package/src/mbcssm/euctw.js +55 -0
  43. package/src/mbcssm/gb2312.js +60 -0
  44. package/src/mbcssm/sjis.js +54 -0
  45. package/src/mbcssm/ucs2be.js +56 -0
  46. package/src/mbcssm/ucs2le.js +56 -0
  47. package/src/mbcssm/utf8.js +75 -0
  48. package/src/sbcharsetprober.js +137 -0
  49. package/src/sbcsgroupprober.js +83 -0
  50. package/src/sjisprober.js +105 -0
  51. package/src/universaldetector.js +262 -0
  52. package/src/utf8prober.js +108 -0
@@ -0,0 +1,168 @@
1
+ /*
2
+ * The Original Code is Mozilla Universal charset detector code.
3
+ *
4
+ * The Initial Developer of the Original Code is
5
+ * Netscape Communications Corporation.
6
+ * Portions created by the Initial Developer are Copyright (C) 2001
7
+ * the Initial Developer. All Rights Reserved.
8
+ *
9
+ * Contributor(s):
10
+ * António Afonso (antonio.afonso gmail.com) - port to JavaScript
11
+ * Mark Pilgrim - port to Python
12
+ * Shy Shalom - original C code
13
+ *
14
+ * This library is free software; you can redistribute it and/or
15
+ * modify it under the terms of the GNU Lesser General Public
16
+ * License as published by the Free Software Foundation; either
17
+ * version 2.1 of the License, or (at your option) any later version.
18
+ *
19
+ * This library is distributed in the hope that it will be useful,
20
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
21
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22
+ * Lesser General Public License for more details.
23
+ *
24
+ * You should have received a copy of the GNU Lesser General Public
25
+ * License along with this library; if not, write to the Free Software
26
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
27
+ * 02110-1301 USA
28
+ */
29
+
30
+ var CharSetProber = require('./charsetprober');
31
+ var Constants = require('./constants');
32
+
33
+ var UDF = 0; // undefined
34
+ var OTH = 1; // other
35
+ var ASC = 2; // ascii capital letter
36
+ var ASS = 3; // ascii small letter
37
+ var ACV = 4; // accent capital vowel
38
+ var ACO = 5; // accent capital other
39
+ var ASV = 6; // accent small vowel
40
+ var ASO = 7; // accent small other
41
+
42
+ var Latin1_CharToClass = [
43
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 00 - 07
44
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 08 - 0F
45
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 10 - 17
46
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 18 - 1F
47
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 20 - 27
48
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 28 - 2F
49
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 30 - 37
50
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 38 - 3F
51
+ OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC, // 40 - 47
52
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, // 48 - 4F
53
+ ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, // 50 - 57
54
+ ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH, // 58 - 5F
55
+ OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS, // 60 - 67
56
+ ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, // 68 - 6F
57
+ ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, // 70 - 77
58
+ ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH, // 78 - 7F
59
+ OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH, // 80 - 87
60
+ OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF, // 88 - 8F
61
+ UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 90 - 97
62
+ OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO, // 98 - 9F
63
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // A0 - A7
64
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // A8 - AF
65
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // B0 - B7
66
+ OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // B8 - BF
67
+ ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO, // C0 - C7
68
+ ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV, // C8 - CF
69
+ ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH, // D0 - D7
70
+ ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO, // D8 - DF
71
+ ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO, // E0 - E7
72
+ ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, // E8 - EF
73
+ ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH, // F0 - F7
74
+ ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO // F8 - FF
75
+ ];
76
+
77
+ // 0 : illegal
78
+ // 1 : very unlikely
79
+ // 2 : normal
80
+ // 3 : very likely
81
+ var Latin1ClassModel = [
82
+ // UDF OTH ASC ASS ACV ACO ASV ASO
83
+ 0, 0, 0, 0, 0, 0, 0, 0, // UDF
84
+ 0, 3, 3, 3, 3, 3, 3, 3, // OTH
85
+ 0, 3, 3, 3, 3, 3, 3, 3, // ASC
86
+ 0, 3, 3, 3, 1, 1, 3, 3, // ASS
87
+ 0, 3, 3, 3, 1, 2, 1, 2, // ACV
88
+ 0, 3, 3, 3, 3, 3, 3, 3, // ACO
89
+ 0, 3, 1, 3, 1, 1, 1, 3, // ASV
90
+ 0, 3, 1, 3, 1, 1, 3, 3 // ASO
91
+ ];
92
+
93
+ function Latin1Prober() {
94
+ CharSetProber.apply(this);
95
+
96
+ var FREQ_CAT_NUM = 4;
97
+ var CLASS_NUM = 8; // total classes
98
+ var self = this;
99
+
100
+ function init() {
101
+ self.reset();
102
+ }
103
+
104
+ this.reset = function() {
105
+ this._mLastCharClass = OTH;
106
+ this._mFreqCounter = [];
107
+ for( var i = 0; i < FREQ_CAT_NUM; this._mFreqCounter[i++] = 0 );
108
+ Latin1Prober.prototype.reset.apply(this);
109
+ }
110
+
111
+ this.getCharsetName = function() {
112
+ return "windows-1252";
113
+ }
114
+
115
+ this.getSupportedCharsetNames = function() {
116
+ return [this.getCharsetName()];
117
+ }
118
+
119
+ this.feed = function(aBuf) {
120
+ aBuf = this.filterWithEnglishLetters(aBuf);
121
+ for( var i = 0; i < aBuf.length; i++ ) {
122
+ var c = aBuf.charCodeAt(i);
123
+ var charClass = Latin1_CharToClass[c];
124
+ var freq = Latin1ClassModel[(this._mLastCharClass * CLASS_NUM) + charClass];
125
+ if( freq == 0 ) {
126
+ this._mState = Constants.notMe;
127
+ break;
128
+ }
129
+ this._mFreqCounter[freq]++;
130
+ this._mLastCharClass = charClass;
131
+ }
132
+
133
+ return this.getState();
134
+ }
135
+
136
+ this.getConfidence = function() {
137
+ var confidence;
138
+ var constants;
139
+
140
+ if( this.getState() == Constants.notMe ) {
141
+ return 0.01;
142
+ }
143
+
144
+ var total = 0;
145
+ for( var i = 0; i < this._mFreqCounter.length; i++ ) {
146
+ total += this._mFreqCounter[i];
147
+ }
148
+ if( total < 0.01 ) {
149
+ constants = 0.0;
150
+ } else {
151
+ confidence = (this._mFreqCounter[3] / total) - (this._mFreqCounter[1] * 20 / total);
152
+ }
153
+ if( confidence < 0 ) {
154
+ confidence = 0.0;
155
+ }
156
+ // lower the confidence of latin1 so that other more accurate detector
157
+ // can take priority.
158
+ //
159
+ // antonio.afonso: need to change this otherwise languages like pt, es, fr using latin1 will never be detected.
160
+ confidence = confidence * 0.95;
161
+ return confidence;
162
+ }
163
+
164
+ init();
165
+ }
166
+ Latin1Prober.prototype = new CharSetProber();
167
+
168
+ module.exports = Latin1Prober
package/src/logger.js ADDED
@@ -0,0 +1,7 @@
1
+ // By default, do nothing
2
+ exports.log = function () {};
3
+
4
+ exports.setLogger = function setLogger(loggerFunction) {
5
+ exports.enabled = true;
6
+ exports.log = loggerFunction;
7
+ };
@@ -0,0 +1,99 @@
1
+ /*
2
+ * The Original Code is Mozilla Universal charset detector code.
3
+ *
4
+ * The Initial Developer of the Original Code is
5
+ * Netscape Communications Corporation.
6
+ * Portions created by the Initial Developer are Copyright (C) 2001
7
+ * the Initial Developer. All Rights Reserved.
8
+ *
9
+ * Contributor(s):
10
+ * António Afonso (antonio.afonso gmail.com) - port to JavaScript
11
+ * Mark Pilgrim - port to Python
12
+ * Shy Shalom - original C code
13
+ *
14
+ * This library is free software; you can redistribute it and/or
15
+ * modify it under the terms of the GNU Lesser General Public
16
+ * License as published by the Free Software Foundation; either
17
+ * version 2.1 of the License, or (at your option) any later version.
18
+ *
19
+ * This library is distributed in the hope that it will be useful,
20
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
21
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22
+ * Lesser General Public License for more details.
23
+ *
24
+ * You should have received a copy of the GNU Lesser General Public
25
+ * License along with this library; if not, write to the Free Software
26
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
27
+ * 02110-1301 USA
28
+ */
29
+
30
+ var CharSetProber = require('./charsetprober');
31
+ var constants = require('./constants');
32
+ var logger = require('./logger');
33
+
34
+ function MultiByteCharSetProber() {
35
+ CharSetProber.apply(this);
36
+
37
+ var self = this;
38
+
39
+ function init() {
40
+ self._mDistributionAnalyzer = null;
41
+ self._mCodingSM = null;
42
+ self._mLastChar = ["\x00", "\x00"];
43
+ }
44
+
45
+ this.reset = function() {
46
+ MultiByteCharSetProber.prototype.reset.apply(this);
47
+ if( this._mCodingSM ) {
48
+ this._mCodingSM.reset();
49
+ }
50
+ if( this._mDistributionAnalyzer ) {
51
+ this._mDistributionAnalyzer.reset();
52
+ }
53
+ this._mLastChar = ["\x00", "\x00"];
54
+ }
55
+
56
+ this.getCharsetName = function() {
57
+ }
58
+
59
+ this.feed = function(aBuf) {
60
+ var aLen = aBuf.length;
61
+ for( var i = 0; i < aLen; i++ ) {
62
+ var codingState = this._mCodingSM.nextState(aBuf[i]);
63
+ if( codingState == constants.error ) {
64
+ logger.log(this.getCharsetName() + " prober hit error at byte " + i + "\n");
65
+ this._mState = constants.notMe;
66
+ break;
67
+ } else if( codingState == constants.itsMe ) {
68
+ this._mState = constants.foundIt;
69
+ break;
70
+ } else if( codingState == constants.start ) {
71
+ var charLen = this._mCodingSM.getCurrentCharLen();
72
+ if( i == 0 ) {
73
+ this._mLastChar[1] = aBuf[0];
74
+ this._mDistributionAnalyzer.feed(this._mLastChar.join(''), charLen);
75
+ } else {
76
+ this._mDistributionAnalyzer.feed(aBuf.slice(i-1,i+1), charLen);
77
+ }
78
+ }
79
+ }
80
+
81
+ this._mLastChar[0] = aBuf[aLen - 1];
82
+
83
+ if( this.getState() == constants.detecting ) {
84
+ if( this._mDistributionAnalyzer.gotEnoughData() &&
85
+ this.getConfidence() > constants.SHORTCUT_THRESHOLD ) {
86
+ this._mState = constants.foundIt;
87
+ }
88
+ }
89
+
90
+ return this.getState();
91
+ }
92
+
93
+ this.getConfidence = function() {
94
+ return this._mDistributionAnalyzer.getConfidence();
95
+ }
96
+ }
97
+ MultiByteCharSetProber.prototype = new CharSetProber();
98
+
99
+ module.exports = MultiByteCharSetProber
@@ -0,0 +1,64 @@
1
+ /*
2
+ * The Original Code is Mozilla Universal charset detector code.
3
+ *
4
+ * The Initial Developer of the Original Code is
5
+ * Netscape Communications Corporation.
6
+ * Portions created by the Initial Developer are Copyright (C) 2001
7
+ * the Initial Developer. All Rights Reserved.
8
+ *
9
+ * Contributor(s):
10
+ * António Afonso (antonio.afonso gmail.com) - port to JavaScript
11
+ * Mark Pilgrim - port to Python
12
+ * Shy Shalom - original C code
13
+ *
14
+ * This library is free software; you can redistribute it and/or
15
+ * modify it under the terms of the GNU Lesser General Public
16
+ * License as published by the Free Software Foundation; either
17
+ * version 2.1 of the License, or (at your option) any later version.
18
+ *
19
+ * This library is distributed in the hope that it will be useful,
20
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
21
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22
+ * Lesser General Public License for more details.
23
+ *
24
+ * You should have received a copy of the GNU Lesser General Public
25
+ * License along with this library; if not, write to the Free Software
26
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
27
+ * 02110-1301 USA
28
+ */
29
+
30
+ var CharSetGroupProber = require('./charsetgroupprober');
31
+ var Big5Prober = require('./big5prober');
32
+ var UTF8Prober = require('./utf8prober');
33
+ var SJISProber = require('./sjisprober');
34
+ var EUCJPProber = require('./eucjpprober');
35
+ var GB2312Prober = require('./gb2312prober');
36
+ var EUCKRProber = require('./euckrprober');
37
+ var EUCTWProber = require('./euctwprober');
38
+
39
+ function MBCSGroupProber() {
40
+ CharSetGroupProber.apply(this);
41
+ this._mProbers = [
42
+ new UTF8Prober(),
43
+ new SJISProber(),
44
+ new EUCJPProber(),
45
+ new GB2312Prober(),
46
+ new EUCKRProber(),
47
+ new Big5Prober(),
48
+ new EUCTWProber()
49
+ ];
50
+ const supportedCharsetNames = (function() {
51
+ const charsetNames = [];
52
+ for (const prober of this._mProbers) {
53
+ charsetNames.push(prober.getCharsetName())
54
+ }
55
+ return charsetNames;
56
+ });
57
+ this.getSupportedCharsetNames = function() {
58
+ return supportedCharsetNames;
59
+ }
60
+ this.reset();
61
+ }
62
+ MBCSGroupProber.prototype = new CharSetGroupProber();
63
+
64
+ module.exports = MBCSGroupProber
@@ -0,0 +1,52 @@
1
+ var consts = require('../constants');
2
+
3
+ var BIG5_cls = [
4
+ 1,1,1,1,1,1,1,1, // 00 - 07 //allow 0x00 as legal value
5
+ 1,1,1,1,1,1,0,0, // 08 - 0f
6
+ 1,1,1,1,1,1,1,1, // 10 - 17
7
+ 1,1,1,0,1,1,1,1, // 18 - 1f
8
+ 1,1,1,1,1,1,1,1, // 20 - 27
9
+ 1,1,1,1,1,1,1,1, // 28 - 2f
10
+ 1,1,1,1,1,1,1,1, // 30 - 37
11
+ 1,1,1,1,1,1,1,1, // 38 - 3f
12
+ 2,2,2,2,2,2,2,2, // 40 - 47
13
+ 2,2,2,2,2,2,2,2, // 48 - 4f
14
+ 2,2,2,2,2,2,2,2, // 50 - 57
15
+ 2,2,2,2,2,2,2,2, // 58 - 5f
16
+ 2,2,2,2,2,2,2,2, // 60 - 67
17
+ 2,2,2,2,2,2,2,2, // 68 - 6f
18
+ 2,2,2,2,2,2,2,2, // 70 - 77
19
+ 2,2,2,2,2,2,2,1, // 78 - 7f
20
+ 4,4,4,4,4,4,4,4, // 80 - 87
21
+ 4,4,4,4,4,4,4,4, // 88 - 8f
22
+ 4,4,4,4,4,4,4,4, // 90 - 97
23
+ 4,4,4,4,4,4,4,4, // 98 - 9f
24
+ 4,3,3,3,3,3,3,3, // a0 - a7
25
+ 3,3,3,3,3,3,3,3, // a8 - af
26
+ 3,3,3,3,3,3,3,3, // b0 - b7
27
+ 3,3,3,3,3,3,3,3, // b8 - bf
28
+ 3,3,3,3,3,3,3,3, // c0 - c7
29
+ 3,3,3,3,3,3,3,3, // c8 - cf
30
+ 3,3,3,3,3,3,3,3, // d0 - d7
31
+ 3,3,3,3,3,3,3,3, // d8 - df
32
+ 3,3,3,3,3,3,3,3, // e0 - e7
33
+ 3,3,3,3,3,3,3,3, // e8 - ef
34
+ 3,3,3,3,3,3,3,3, // f0 - f7
35
+ 3,3,3,3,3,3,3,0 // f8 - ff
36
+ ];
37
+
38
+ var BIG5_st = [
39
+ consts.error,consts.start,consts.start, 3,consts.error,consts.error,consts.error,consts.error, //00-07
40
+ consts.error,consts.error,consts.itsMe,consts.itsMe,consts.itsMe,consts.itsMe,consts.itsMe,consts.error, //08-0f
41
+ consts.error,consts.start,consts.start,consts.start,consts.start,consts.start,consts.start,consts.start //10-17
42
+ ];
43
+
44
+ var Big5CharLenTable = [0, 1, 1, 2, 0];
45
+
46
+ module.exports = {
47
+ "classTable" : BIG5_cls,
48
+ "classFactor" : 5,
49
+ "stateTable" : BIG5_st,
50
+ "charLenTable" : Big5CharLenTable,
51
+ "name" : "Big5"
52
+ };
@@ -0,0 +1,54 @@
1
+ var consts = require('../constants');
2
+
3
+ var EUCJP_cls = [
4
+ 4,4,4,4,4,4,4,4, // 00 - 07
5
+ 4,4,4,4,4,4,5,5, // 08 - 0f
6
+ 4,4,4,4,4,4,4,4, // 10 - 17
7
+ 4,4,4,5,4,4,4,4, // 18 - 1f
8
+ 4,4,4,4,4,4,4,4, // 20 - 27
9
+ 4,4,4,4,4,4,4,4, // 28 - 2f
10
+ 4,4,4,4,4,4,4,4, // 30 - 37
11
+ 4,4,4,4,4,4,4,4, // 38 - 3f
12
+ 4,4,4,4,4,4,4,4, // 40 - 47
13
+ 4,4,4,4,4,4,4,4, // 48 - 4f
14
+ 4,4,4,4,4,4,4,4, // 50 - 57
15
+ 4,4,4,4,4,4,4,4, // 58 - 5f
16
+ 4,4,4,4,4,4,4,4, // 60 - 67
17
+ 4,4,4,4,4,4,4,4, // 68 - 6f
18
+ 4,4,4,4,4,4,4,4, // 70 - 77
19
+ 4,4,4,4,4,4,4,4, // 78 - 7f
20
+ 5,5,5,5,5,5,5,5, // 80 - 87
21
+ 5,5,5,5,5,5,1,3, // 88 - 8f
22
+ 5,5,5,5,5,5,5,5, // 90 - 97
23
+ 5,5,5,5,5,5,5,5, // 98 - 9f
24
+ 5,2,2,2,2,2,2,2, // a0 - a7
25
+ 2,2,2,2,2,2,2,2, // a8 - af
26
+ 2,2,2,2,2,2,2,2, // b0 - b7
27
+ 2,2,2,2,2,2,2,2, // b8 - bf
28
+ 2,2,2,2,2,2,2,2, // c0 - c7
29
+ 2,2,2,2,2,2,2,2, // c8 - cf
30
+ 2,2,2,2,2,2,2,2, // d0 - d7
31
+ 2,2,2,2,2,2,2,2, // d8 - df
32
+ 0,0,0,0,0,0,0,0, // e0 - e7
33
+ 0,0,0,0,0,0,0,0, // e8 - ef
34
+ 0,0,0,0,0,0,0,0, // f0 - f7
35
+ 0,0,0,0,0,0,0,5 // f8 - ff
36
+ ];
37
+
38
+ var EUCJP_st = [
39
+ 3, 4, 3, 5,consts.start,consts.error,consts.error,consts.error, //00-07
40
+ consts.error,consts.error,consts.error,consts.error,consts.itsMe,consts.itsMe,consts.itsMe,consts.itsMe, //08-0f
41
+ consts.itsMe,consts.itsMe,consts.start,consts.error,consts.start,consts.error,consts.error,consts.error, //10-17
42
+ consts.error,consts.error,consts.start,consts.error,consts.error,consts.error, 3,consts.error, //18-1f
43
+ 3,consts.error,consts.error,consts.error,consts.start,consts.start,consts.start,consts.start //20-27
44
+ ];
45
+
46
+ var EUCJPCharLenTable = [2, 2, 2, 3, 1, 0];
47
+
48
+ module.exports = {
49
+ "classTable" : EUCJP_cls,
50
+ "classFactor" : 6,
51
+ "stateTable" : EUCJP_st,
52
+ "charLenTable" : EUCJPCharLenTable,
53
+ "name" : "EUC-JP"
54
+ };
@@ -0,0 +1,51 @@
1
+ var consts = require('../constants');
2
+
3
+ var EUCKR_cls = [
4
+ 1,1,1,1,1,1,1,1, // 00 - 07
5
+ 1,1,1,1,1,1,0,0, // 08 - 0f
6
+ 1,1,1,1,1,1,1,1, // 10 - 17
7
+ 1,1,1,0,1,1,1,1, // 18 - 1f
8
+ 1,1,1,1,1,1,1,1, // 20 - 27
9
+ 1,1,1,1,1,1,1,1, // 28 - 2f
10
+ 1,1,1,1,1,1,1,1, // 30 - 37
11
+ 1,1,1,1,1,1,1,1, // 38 - 3f
12
+ 1,1,1,1,1,1,1,1, // 40 - 47
13
+ 1,1,1,1,1,1,1,1, // 48 - 4f
14
+ 1,1,1,1,1,1,1,1, // 50 - 57
15
+ 1,1,1,1,1,1,1,1, // 58 - 5f
16
+ 1,1,1,1,1,1,1,1, // 60 - 67
17
+ 1,1,1,1,1,1,1,1, // 68 - 6f
18
+ 1,1,1,1,1,1,1,1, // 70 - 77
19
+ 1,1,1,1,1,1,1,1, // 78 - 7f
20
+ 0,0,0,0,0,0,0,0, // 80 - 87
21
+ 0,0,0,0,0,0,0,0, // 88 - 8f
22
+ 0,0,0,0,0,0,0,0, // 90 - 97
23
+ 0,0,0,0,0,0,0,0, // 98 - 9f
24
+ 0,2,2,2,2,2,2,2, // a0 - a7
25
+ 2,2,2,2,2,3,3,3, // a8 - af
26
+ 2,2,2,2,2,2,2,2, // b0 - b7
27
+ 2,2,2,2,2,2,2,2, // b8 - bf
28
+ 2,2,2,2,2,2,2,2, // c0 - c7
29
+ 2,3,2,2,2,2,2,2, // c8 - cf
30
+ 2,2,2,2,2,2,2,2, // d0 - d7
31
+ 2,2,2,2,2,2,2,2, // d8 - df
32
+ 2,2,2,2,2,2,2,2, // e0 - e7
33
+ 2,2,2,2,2,2,2,2, // e8 - ef
34
+ 2,2,2,2,2,2,2,2, // f0 - f7
35
+ 2,2,2,2,2,2,2,0 // f8 - ff
36
+ ];
37
+
38
+ var EUCKR_st = [
39
+ consts.error,consts.start, 3,consts.error,consts.error,consts.error,consts.error,consts.error, //00-07
40
+ consts.itsMe,consts.itsMe,consts.itsMe,consts.itsMe,consts.error,consts.error,consts.start,consts.start //08-0f
41
+ ];
42
+
43
+ var EUCKRCharLenTable = [0, 1, 2, 0];
44
+
45
+ module.exports = {
46
+ "classTable" : EUCKR_cls,
47
+ "classFactor" : 4,
48
+ "stateTable" : EUCKR_st,
49
+ "charLenTable" : EUCKRCharLenTable,
50
+ "name" : "EUC-KR"
51
+ };
@@ -0,0 +1,55 @@
1
+ var consts = require('../constants');
2
+
3
+ var EUCTW_cls = [
4
+ 2,2,2,2,2,2,2,2, // 00 - 07
5
+ 2,2,2,2,2,2,0,0, // 08 - 0f
6
+ 2,2,2,2,2,2,2,2, // 10 - 17
7
+ 2,2,2,0,2,2,2,2, // 18 - 1f
8
+ 2,2,2,2,2,2,2,2, // 20 - 27
9
+ 2,2,2,2,2,2,2,2, // 28 - 2f
10
+ 2,2,2,2,2,2,2,2, // 30 - 37
11
+ 2,2,2,2,2,2,2,2, // 38 - 3f
12
+ 2,2,2,2,2,2,2,2, // 40 - 47
13
+ 2,2,2,2,2,2,2,2, // 48 - 4f
14
+ 2,2,2,2,2,2,2,2, // 50 - 57
15
+ 2,2,2,2,2,2,2,2, // 58 - 5f
16
+ 2,2,2,2,2,2,2,2, // 60 - 67
17
+ 2,2,2,2,2,2,2,2, // 68 - 6f
18
+ 2,2,2,2,2,2,2,2, // 70 - 77
19
+ 2,2,2,2,2,2,2,2, // 78 - 7f
20
+ 0,0,0,0,0,0,0,0, // 80 - 87
21
+ 0,0,0,0,0,0,6,0, // 88 - 8f
22
+ 0,0,0,0,0,0,0,0, // 90 - 97
23
+ 0,0,0,0,0,0,0,0, // 98 - 9f
24
+ 0,3,4,4,4,4,4,4, // a0 - a7
25
+ 5,5,1,1,1,1,1,1, // a8 - af
26
+ 1,1,1,1,1,1,1,1, // b0 - b7
27
+ 1,1,1,1,1,1,1,1, // b8 - bf
28
+ 1,1,3,1,3,3,3,3, // c0 - c7
29
+ 3,3,3,3,3,3,3,3, // c8 - cf
30
+ 3,3,3,3,3,3,3,3, // d0 - d7
31
+ 3,3,3,3,3,3,3,3, // d8 - df
32
+ 3,3,3,3,3,3,3,3, // e0 - e7
33
+ 3,3,3,3,3,3,3,3, // e8 - ef
34
+ 3,3,3,3,3,3,3,3, // f0 - f7
35
+ 3,3,3,3,3,3,3,0 // f8 - ff
36
+ ];
37
+
38
+ var EUCTW_st = [
39
+ consts.error,consts.error,consts.start, 3, 3, 3, 4,consts.error, //00-07
40
+ consts.error,consts.error,consts.error,consts.error,consts.error,consts.error,consts.itsMe,consts.itsMe, //08-0f
41
+ consts.itsMe,consts.itsMe,consts.itsMe,consts.itsMe,consts.itsMe,consts.error,consts.start,consts.error, //10-17
42
+ consts.start,consts.start,consts.start,consts.error,consts.error,consts.error,consts.error,consts.error, //18-1f
43
+ 5,consts.error,consts.error,consts.error,consts.start,consts.error,consts.start,consts.start, //20-27
44
+ consts.start,consts.error,consts.start,consts.start,consts.start,consts.start,consts.start,consts.start //28-2f
45
+ ];
46
+
47
+ var EUCTWCharLenTable = [0, 0, 1, 2, 2, 2, 3];
48
+
49
+ module.exports = {
50
+ "classTable" : EUCTW_cls,
51
+ "classFactor" : 7,
52
+ "stateTable" : EUCTW_st,
53
+ "charLenTable" : EUCTWCharLenTable,
54
+ "name" : "x-euc-tw"
55
+ };
@@ -0,0 +1,60 @@
1
+ var consts = require('../constants');
2
+
3
+ var GB2312_cls = [
4
+ 1,1,1,1,1,1,1,1, // 00 - 07
5
+ 1,1,1,1,1,1,0,0, // 08 - 0f
6
+ 1,1,1,1,1,1,1,1, // 10 - 17
7
+ 1,1,1,0,1,1,1,1, // 18 - 1f
8
+ 1,1,1,1,1,1,1,1, // 20 - 27
9
+ 1,1,1,1,1,1,1,1, // 28 - 2f
10
+ 3,3,3,3,3,3,3,3, // 30 - 37
11
+ 3,3,1,1,1,1,1,1, // 38 - 3f
12
+ 2,2,2,2,2,2,2,2, // 40 - 47
13
+ 2,2,2,2,2,2,2,2, // 48 - 4f
14
+ 2,2,2,2,2,2,2,2, // 50 - 57
15
+ 2,2,2,2,2,2,2,2, // 58 - 5f
16
+ 2,2,2,2,2,2,2,2, // 60 - 67
17
+ 2,2,2,2,2,2,2,2, // 68 - 6f
18
+ 2,2,2,2,2,2,2,2, // 70 - 77
19
+ 2,2,2,2,2,2,2,4, // 78 - 7f
20
+ 5,6,6,6,6,6,6,6, // 80 - 87
21
+ 6,6,6,6,6,6,6,6, // 88 - 8f
22
+ 6,6,6,6,6,6,6,6, // 90 - 97
23
+ 6,6,6,6,6,6,6,6, // 98 - 9f
24
+ 6,6,6,6,6,6,6,6, // a0 - a7
25
+ 6,6,6,6,6,6,6,6, // a8 - af
26
+ 6,6,6,6,6,6,6,6, // b0 - b7
27
+ 6,6,6,6,6,6,6,6, // b8 - bf
28
+ 6,6,6,6,6,6,6,6, // c0 - c7
29
+ 6,6,6,6,6,6,6,6, // c8 - cf
30
+ 6,6,6,6,6,6,6,6, // d0 - d7
31
+ 6,6,6,6,6,6,6,6, // d8 - df
32
+ 6,6,6,6,6,6,6,6, // e0 - e7
33
+ 6,6,6,6,6,6,6,6, // e8 - ef
34
+ 6,6,6,6,6,6,6,6, // f0 - f7
35
+ 6,6,6,6,6,6,6,0 // f8 - ff
36
+ ];
37
+
38
+ var GB2312_st = [
39
+ consts.error,consts.start,consts.start,consts.start,consts.start,consts.start, 3,consts.error, //00-07
40
+ consts.error,consts.error,consts.error,consts.error,consts.error,consts.error,consts.itsMe,consts.itsMe, //08-0f
41
+ consts.itsMe,consts.itsMe,consts.itsMe,consts.itsMe,consts.itsMe,consts.error,consts.error,consts.start, //10-17
42
+ 4,consts.error,consts.start,consts.start,consts.error,consts.error,consts.error,consts.error, //18-1f
43
+ consts.error,consts.error, 5,consts.error,consts.error,consts.error,consts.itsMe,consts.error, //20-27
44
+ consts.error,consts.error,consts.start,consts.start,consts.start,consts.start,consts.start,consts.start //28-2f
45
+ ];
46
+
47
+ // To be accurate, the length of class 6 can be either 2 or 4.
48
+ // But it is not necessary to discriminate between the two since
49
+ // it is used for frequency analysis only, and we are validing
50
+ // each code range there as well. So it is safe to set it to be
51
+ // 2 here.
52
+ var GB2312CharLenTable = [0, 1, 1, 1, 1, 1, 2];
53
+
54
+ module.exports = {
55
+ "classTable" : GB2312_cls,
56
+ "classFactor" : 7,
57
+ "stateTable" : GB2312_st,
58
+ "charLenTable" : GB2312CharLenTable,
59
+ "name" : "GB2312"
60
+ };
@@ -0,0 +1,54 @@
1
+ var consts = require('../constants');
2
+
3
+ var SJIS_cls = [
4
+ 1,1,1,1,1,1,1,1, // 00 - 07
5
+ 1,1,1,1,1,1,0,0, // 08 - 0f
6
+ 1,1,1,1,1,1,1,1, // 10 - 17
7
+ 1,1,1,0,1,1,1,1, // 18 - 1f
8
+ 1,1,1,1,1,1,1,1, // 20 - 27
9
+ 1,1,1,1,1,1,1,1, // 28 - 2f
10
+ 1,1,1,1,1,1,1,1, // 30 - 37
11
+ 1,1,1,1,1,1,1,1, // 38 - 3f
12
+ 2,2,2,2,2,2,2,2, // 40 - 47
13
+ 2,2,2,2,2,2,2,2, // 48 - 4f
14
+ 2,2,2,2,2,2,2,2, // 50 - 57
15
+ 2,2,2,2,2,2,2,2, // 58 - 5f
16
+ 2,2,2,2,2,2,2,2, // 60 - 67
17
+ 2,2,2,2,2,2,2,2, // 68 - 6f
18
+ 2,2,2,2,2,2,2,2, // 70 - 77
19
+ 2,2,2,2,2,2,2,1, // 78 - 7f
20
+ 3,3,3,3,3,3,3,3, // 80 - 87
21
+ 3,3,3,3,3,3,3,3, // 88 - 8f
22
+ 3,3,3,3,3,3,3,3, // 90 - 97
23
+ 3,3,3,3,3,3,3,3, // 98 - 9f
24
+ // 0xa0 is illegal in sjis encoding, but some pages does
25
+ // contain such byte. We need to be more consts.error forgiven.
26
+ 2,2,2,2,2,2,2,2, // a0 - a7
27
+ 2,2,2,2,2,2,2,2, // a8 - af
28
+ 2,2,2,2,2,2,2,2, // b0 - b7
29
+ 2,2,2,2,2,2,2,2, // b8 - bf
30
+ 2,2,2,2,2,2,2,2, // c0 - c7
31
+ 2,2,2,2,2,2,2,2, // c8 - cf
32
+ 2,2,2,2,2,2,2,2, // d0 - d7
33
+ 2,2,2,2,2,2,2,2, // d8 - df
34
+ 3,3,3,3,3,3,3,3, // e0 - e7
35
+ 3,3,3,3,3,4,4,4, // e8 - ef
36
+ 3,3,3,3,3,3,3,3, // f0 - f7
37
+ 3,3,3,3,3,0,0,0 // f8 - ff
38
+ ];
39
+
40
+ var SJIS_st = [
41
+ consts.error,consts.start,consts.start, 3,consts.error,consts.error,consts.error,consts.error, //00-07
42
+ consts.error,consts.error,consts.error,consts.error,consts.itsMe,consts.itsMe,consts.itsMe,consts.itsMe, //08-0f
43
+ consts.itsMe,consts.itsMe,consts.error,consts.error,consts.start,consts.start,consts.start,consts.start //10-17
44
+ ];
45
+
46
+ var SJISCharLenTable = [0, 1, 1, 2, 0, 0];
47
+
48
+ module.exports = {
49
+ "classTable" : SJIS_cls,
50
+ "classFactor" : 6,
51
+ "stateTable" : SJIS_st,
52
+ "charLenTable" : SJISCharLenTable,
53
+ "name" : "Shift_JIS"
54
+ };