@loaders.gl/polyfills 4.0.0-alpha.5 → 4.0.0-alpha.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bundle.js +2 -2
- package/dist/dist.min.js +65 -2188
- package/dist/es5/bundle.js +6 -0
- package/dist/es5/bundle.js.map +1 -0
- package/dist/es5/index.js +99 -0
- package/dist/es5/index.js.map +1 -0
- package/dist/es5/lib/encoding-indexes.js +37 -0
- package/dist/es5/lib/encoding-indexes.js.map +1 -0
- package/dist/es5/lib/encoding.js +1214 -0
- package/dist/es5/lib/encoding.js.map +1 -0
- package/dist/es5/libs/encoding-indexes-asian.js +13 -0
- package/dist/es5/node/buffer/btoa.node.js +14 -0
- package/dist/es5/node/buffer/btoa.node.js.map +1 -0
- package/dist/es5/node/buffer/to-array-buffer.node.js +14 -0
- package/dist/es5/node/buffer/to-array-buffer.node.js.map +1 -0
- package/dist/es5/node/fetch/fetch-file.node.js +83 -0
- package/dist/es5/node/fetch/fetch-file.node.js.map +1 -0
- package/dist/es5/node/fetch/fetch.node.js +194 -0
- package/dist/es5/node/fetch/fetch.node.js.map +1 -0
- package/dist/es5/node/fetch/headers.node.js +151 -0
- package/dist/es5/node/fetch/headers.node.js.map +1 -0
- package/dist/es5/node/fetch/response.node.js +182 -0
- package/dist/es5/node/fetch/response.node.js.map +1 -0
- package/dist/es5/node/fetch/utils/decode-data-uri.node.js +58 -0
- package/dist/es5/node/fetch/utils/decode-data-uri.node.js.map +1 -0
- package/dist/es5/node/fetch/utils/stream-utils.node.js +92 -0
- package/dist/es5/node/fetch/utils/stream-utils.node.js.map +1 -0
- package/dist/es5/node/file/blob-stream-controller.js +90 -0
- package/dist/es5/node/file/blob-stream-controller.js.map +1 -0
- package/dist/es5/node/file/blob-stream.js +64 -0
- package/dist/es5/node/file/blob-stream.js.map +1 -0
- package/dist/es5/node/file/blob.js +212 -0
- package/dist/es5/node/file/blob.js.map +1 -0
- package/dist/es5/node/file/file-reader.js +153 -0
- package/dist/es5/node/file/file-reader.js.map +1 -0
- package/dist/es5/node/file/file.js +44 -0
- package/dist/es5/node/file/file.js.map +1 -0
- package/dist/es5/node/file/install-file-polyfills.js +25 -0
- package/dist/es5/node/file/install-file-polyfills.js.map +1 -0
- package/dist/es5/node/file/readable-stream.js +27 -0
- package/dist/es5/node/file/readable-stream.js.map +1 -0
- package/dist/es5/node/images/encode-image.node.js +30 -0
- package/dist/es5/node/images/encode-image.node.js.map +1 -0
- package/dist/es5/node/images/parse-image.node.js +64 -0
- package/dist/es5/node/images/parse-image.node.js.map +1 -0
- package/dist/es5/promise/all-settled.js +28 -0
- package/dist/es5/promise/all-settled.js.map +1 -0
- package/dist/es5/utils/assert.js +12 -0
- package/dist/es5/utils/assert.js.map +1 -0
- package/dist/es5/utils/globals.js +18 -0
- package/dist/es5/utils/globals.js.map +1 -0
- package/dist/esm/bundle.js +4 -0
- package/dist/esm/bundle.js.map +1 -0
- package/dist/esm/index.js +50 -0
- package/dist/esm/index.js.map +1 -0
- package/dist/esm/lib/encoding-indexes.js +30 -0
- package/dist/esm/lib/encoding-indexes.js.map +1 -0
- package/dist/esm/lib/encoding.js +1206 -0
- package/dist/esm/lib/encoding.js.map +1 -0
- package/dist/esm/libs/encoding-indexes-asian.js +13 -0
- package/dist/esm/node/buffer/btoa.node.js +7 -0
- package/dist/esm/node/buffer/btoa.node.js.map +1 -0
- package/dist/esm/node/buffer/to-array-buffer.node.js +8 -0
- package/dist/esm/node/buffer/to-array-buffer.node.js.map +1 -0
- package/dist/esm/node/fetch/fetch-file.node.js +50 -0
- package/dist/esm/node/fetch/fetch-file.node.js.map +1 -0
- package/dist/esm/node/fetch/fetch.node.js +126 -0
- package/dist/esm/node/fetch/fetch.node.js.map +1 -0
- package/dist/esm/node/fetch/headers.node.js +102 -0
- package/dist/esm/node/fetch/headers.node.js.map +1 -0
- package/dist/esm/node/fetch/response.node.js +67 -0
- package/dist/esm/node/fetch/response.node.js.map +1 -0
- package/dist/esm/node/fetch/utils/decode-data-uri.node.js +45 -0
- package/dist/esm/node/fetch/utils/decode-data-uri.node.js.map +1 -0
- package/dist/esm/node/fetch/utils/stream-utils.node.js +43 -0
- package/dist/esm/node/fetch/utils/stream-utils.node.js.map +1 -0
- package/dist/esm/node/file/blob-stream-controller.js +44 -0
- package/dist/esm/node/file/blob-stream-controller.js.map +1 -0
- package/dist/esm/node/file/blob-stream.js +20 -0
- package/dist/esm/node/file/blob-stream.js.map +1 -0
- package/dist/esm/node/file/blob.js +120 -0
- package/dist/esm/node/file/blob.js.map +1 -0
- package/dist/esm/node/file/file-reader.js +60 -0
- package/dist/esm/node/file/file-reader.js.map +1 -0
- package/dist/esm/node/file/file.js +19 -0
- package/dist/esm/node/file/file.js.map +1 -0
- package/dist/esm/node/file/install-file-polyfills.js +19 -0
- package/dist/esm/node/file/install-file-polyfills.js.map +1 -0
- package/dist/esm/node/file/readable-stream.js +4 -0
- package/dist/esm/node/file/readable-stream.js.map +1 -0
- package/dist/esm/node/images/encode-image.node.js +20 -0
- package/dist/esm/node/images/encode-image.node.js.map +1 -0
- package/dist/esm/node/images/parse-image.node.js +29 -0
- package/dist/esm/node/images/parse-image.node.js.map +1 -0
- package/dist/esm/promise/all-settled.js +19 -0
- package/dist/esm/promise/all-settled.js.map +1 -0
- package/dist/esm/utils/assert.js +6 -0
- package/dist/esm/utils/assert.js.map +1 -0
- package/dist/esm/utils/globals.js +9 -0
- package/dist/esm/utils/globals.js.map +1 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +84 -50
- package/dist/lib/encoding-indexes.js +34 -29
- package/dist/lib/encoding.js +2610 -1281
- package/dist/libs/encoding-indexes-asian.d.ts +1 -1
- package/dist/libs/encoding-indexes-asian.js +9 -8
- package/dist/node/buffer/btoa.node.js +12 -5
- package/dist/node/buffer/to-array-buffer.node.js +11 -8
- package/dist/node/fetch/fetch-file.node.d.ts +4 -0
- package/dist/node/fetch/fetch-file.node.d.ts.map +1 -0
- package/dist/node/fetch/fetch-file.node.js +51 -0
- package/dist/node/fetch/fetch.node.d.ts +6 -1
- package/dist/node/fetch/fetch.node.d.ts.map +1 -1
- package/dist/node/fetch/fetch.node.js +128 -111
- package/dist/node/fetch/headers.node.d.ts +1 -1
- package/dist/node/fetch/headers.node.d.ts.map +1 -1
- package/dist/node/fetch/headers.node.js +95 -114
- package/dist/node/fetch/response.node.d.ts +2 -2
- package/dist/node/fetch/response.node.d.ts.map +1 -1
- package/dist/node/fetch/response.node.js +72 -84
- package/dist/node/fetch/utils/decode-data-uri.node.js +63 -53
- package/dist/node/fetch/utils/stream-utils.node.d.ts +8 -1
- package/dist/node/fetch/utils/stream-utils.node.d.ts.map +1 -1
- package/dist/node/fetch/utils/stream-utils.node.js +68 -93
- package/dist/node/file/blob-stream-controller.js +59 -52
- package/dist/node/file/blob-stream.js +36 -25
- package/dist/node/file/blob.js +151 -131
- package/dist/node/file/file-reader.js +28 -77
- package/dist/node/file/file.js +36 -25
- package/dist/node/file/install-file-polyfills.js +26 -21
- package/dist/node/file/readable-stream.js +10 -3
- package/dist/node/images/encode-image.node.js +38 -17
- package/dist/node/images/parse-image.node.d.ts +3 -1
- package/dist/node/images/parse-image.node.d.ts.map +1 -1
- package/dist/node/images/parse-image.node.js +40 -19
- package/dist/promise/all-settled.js +22 -17
- package/dist/utils/assert.js +8 -5
- package/dist/utils/globals.js +34 -7
- package/package.json +5 -5
- package/src/index.ts +7 -13
- package/src/node/fetch/fetch-file.node.ts +51 -0
- package/src/node/fetch/fetch.node.ts +64 -30
- package/src/node/fetch/headers.node.ts +1 -1
- package/src/node/fetch/response.node.ts +4 -2
- package/src/node/fetch/utils/stream-utils.node.ts +10 -58
- package/src/node/images/parse-image.node.ts +35 -20
- package/dist/bundle.js.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/lib/encoding-indexes.js.map +0 -1
- package/dist/lib/encoding.js.map +0 -1
- package/dist/node/buffer/btoa.node.js.map +0 -1
- package/dist/node/buffer/to-array-buffer.node.js.map +0 -1
- package/dist/node/fetch/fetch.node.js.map +0 -1
- package/dist/node/fetch/headers.node.js.map +0 -1
- package/dist/node/fetch/response.node.js.map +0 -1
- package/dist/node/fetch/utils/decode-data-uri.node.js.map +0 -1
- package/dist/node/fetch/utils/stream-utils.node.js.map +0 -1
- package/dist/node/file/blob-stream-controller.js.map +0 -1
- package/dist/node/file/blob-stream.js.map +0 -1
- package/dist/node/file/blob.js.map +0 -1
- package/dist/node/file/file-reader.js.map +0 -1
- package/dist/node/file/file.js.map +0 -1
- package/dist/node/file/install-file-polyfills.js.map +0 -1
- package/dist/node/file/readable-stream.js.map +0 -1
- package/dist/node/images/encode-image.node.js.map +0 -1
- package/dist/node/images/parse-image.node.js.map +0 -1
- package/dist/promise/all-settled.js.map +0 -1
- package/dist/utils/assert.js.map +0 -1
- package/dist/utils/globals.js.map +0 -1
package/dist/lib/encoding.js
CHANGED
|
@@ -1,1450 +1,2779 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
1
|
+
"use strict";
|
|
2
|
+
// @ts-nocheck
|
|
3
|
+
/* eslint-disable */
|
|
4
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
5
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
6
|
+
};
|
|
7
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
8
|
+
exports.TextDecoder = exports.TextEncoder = void 0;
|
|
9
|
+
// Copied from https://github.com/inexorabletash/text-encoding/blob/b4e5bc26e26e51f56e3daa9f13138c79f49d3c34/lib/encoding.js
|
|
10
|
+
// This is free and unencumbered software released into the public domain.
|
|
11
|
+
// See LICENSE.md for more information.
|
|
12
|
+
const encoding_indexes_1 = __importDefault(require("./encoding-indexes"));
|
|
13
|
+
// Note: Aaian character indices add half a megabyte to bundle. Ignore, since we really only want the built-in UTF8...
|
|
14
|
+
// import indexes from './encoding-indexes-asian';
|
|
15
|
+
global['encoding-indexes'] = encoding_indexes_1.default || {};
|
|
16
|
+
//
|
|
17
|
+
// Utilities
|
|
18
|
+
//
|
|
19
|
+
/**
|
|
20
|
+
* @param {number} a The number to test.
|
|
21
|
+
* @param {number} min The minimum value in the range, inclusive.
|
|
22
|
+
* @param {number} max The maximum value in the range, inclusive.
|
|
23
|
+
* @return {boolean} True if a >= min and a <= max.
|
|
24
|
+
*/
|
|
4
25
|
function inRange(a, min, max) {
|
|
5
|
-
|
|
26
|
+
return min <= a && a <= max;
|
|
6
27
|
}
|
|
7
|
-
|
|
28
|
+
/**
|
|
29
|
+
* @param {!Array.<*>} array The array to check.
|
|
30
|
+
* @param {*} item The item to look for in the array.
|
|
31
|
+
* @return {boolean} True if the item appears in the array.
|
|
32
|
+
*/
|
|
8
33
|
function includes(array, item) {
|
|
9
|
-
|
|
34
|
+
return array.indexOf(item) !== -1;
|
|
10
35
|
}
|
|
11
|
-
|
|
12
36
|
var floor = Math.floor;
|
|
13
|
-
|
|
37
|
+
/**
|
|
38
|
+
* @param {*} o
|
|
39
|
+
* @return {Object}
|
|
40
|
+
*/
|
|
14
41
|
function ToDictionary(o) {
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
42
|
+
if (o === undefined)
|
|
43
|
+
return {};
|
|
44
|
+
if (o === Object(o))
|
|
45
|
+
return o;
|
|
46
|
+
throw TypeError('Could not convert argument to dictionary');
|
|
18
47
|
}
|
|
19
|
-
|
|
48
|
+
/**
|
|
49
|
+
* @param {string} string Input string of UTF-16 code units.
|
|
50
|
+
* @return {!Array.<number>} Code points.
|
|
51
|
+
*/
|
|
20
52
|
function stringToCodePoints(string) {
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
var
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
53
|
+
// https://heycam.github.io/webidl/#dfn-obtain-unicode
|
|
54
|
+
// 1. Let S be the DOMString value.
|
|
55
|
+
var s = String(string);
|
|
56
|
+
// 2. Let n be the length of S.
|
|
57
|
+
var n = s.length;
|
|
58
|
+
// 3. Initialize i to 0.
|
|
59
|
+
var i = 0;
|
|
60
|
+
// 4. Initialize U to be an empty sequence of Unicode characters.
|
|
61
|
+
var u = [];
|
|
62
|
+
// 5. While i < n:
|
|
63
|
+
while (i < n) {
|
|
64
|
+
// 1. Let c be the code unit in S at index i.
|
|
65
|
+
var c = s.charCodeAt(i);
|
|
66
|
+
// 2. Depending on the value of c:
|
|
67
|
+
// c < 0xD800 or c > 0xDFFF
|
|
68
|
+
if (c < 0xd800 || c > 0xdfff) {
|
|
69
|
+
// Append to U the Unicode character with code point c.
|
|
70
|
+
u.push(c);
|
|
71
|
+
}
|
|
72
|
+
// 0xDC00 ≤ c ≤ 0xDFFF
|
|
73
|
+
else if (0xdc00 <= c && c <= 0xdfff) {
|
|
74
|
+
// Append to U a U+FFFD REPLACEMENT CHARACTER.
|
|
35
75
|
u.push(0xfffd);
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
u.push(
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
76
|
+
}
|
|
77
|
+
// 0xD800 ≤ c ≤ 0xDBFF
|
|
78
|
+
else if (0xd800 <= c && c <= 0xdbff) {
|
|
79
|
+
// 1. If i = n−1, then append to U a U+FFFD REPLACEMENT
|
|
80
|
+
// CHARACTER.
|
|
81
|
+
if (i === n - 1) {
|
|
82
|
+
u.push(0xfffd);
|
|
83
|
+
}
|
|
84
|
+
// 2. Otherwise, i < n−1:
|
|
85
|
+
else {
|
|
86
|
+
// 1. Let d be the code unit in S at index i+1.
|
|
87
|
+
var d = s.charCodeAt(i + 1);
|
|
88
|
+
// 2. If 0xDC00 ≤ d ≤ 0xDFFF, then:
|
|
89
|
+
if (0xdc00 <= d && d <= 0xdfff) {
|
|
90
|
+
// 1. Let a be c & 0x3FF.
|
|
91
|
+
var a = c & 0x3ff;
|
|
92
|
+
// 2. Let b be d & 0x3FF.
|
|
93
|
+
var b = d & 0x3ff;
|
|
94
|
+
// 3. Append to U the Unicode character with code point
|
|
95
|
+
// 2^16+2^10*a+b.
|
|
96
|
+
u.push(0x10000 + (a << 10) + b);
|
|
97
|
+
// 4. Set i to i+1.
|
|
98
|
+
i += 1;
|
|
99
|
+
}
|
|
100
|
+
// 3. Otherwise, d < 0xDC00 or d > 0xDFFF. Append to U a
|
|
101
|
+
// U+FFFD REPLACEMENT CHARACTER.
|
|
102
|
+
else {
|
|
103
|
+
u.push(0xfffd);
|
|
46
104
|
}
|
|
47
105
|
}
|
|
48
106
|
}
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
107
|
+
// 3. Set i to i+1.
|
|
108
|
+
i += 1;
|
|
109
|
+
}
|
|
110
|
+
// 6. Return U.
|
|
111
|
+
return u;
|
|
54
112
|
}
|
|
55
|
-
|
|
113
|
+
/**
|
|
114
|
+
* @param {!Array.<number>} code_points Array of code points.
|
|
115
|
+
* @return {string} string String of UTF-16 code units.
|
|
116
|
+
*/
|
|
56
117
|
function codePointsToString(code_points) {
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
118
|
+
var s = '';
|
|
119
|
+
for (var i = 0; i < code_points.length; ++i) {
|
|
120
|
+
var cp = code_points[i];
|
|
121
|
+
if (cp <= 0xffff) {
|
|
122
|
+
s += String.fromCharCode(cp);
|
|
123
|
+
}
|
|
124
|
+
else {
|
|
125
|
+
cp -= 0x10000;
|
|
126
|
+
s += String.fromCharCode((cp >> 10) + 0xd800, (cp & 0x3ff) + 0xdc00);
|
|
127
|
+
}
|
|
67
128
|
}
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
return s;
|
|
129
|
+
return s;
|
|
71
130
|
}
|
|
72
|
-
|
|
131
|
+
//
|
|
132
|
+
// Implementation of Encoding specification
|
|
133
|
+
// https://encoding.spec.whatwg.org/
|
|
134
|
+
//
|
|
135
|
+
//
|
|
136
|
+
// 4. Terminology
|
|
137
|
+
//
|
|
138
|
+
/**
|
|
139
|
+
* An ASCII byte is a byte in the range 0x00 to 0x7F, inclusive.
|
|
140
|
+
* @param {number} a The number to test.
|
|
141
|
+
* @return {boolean} True if a is in the range 0x00 to 0x7F, inclusive.
|
|
142
|
+
*/
|
|
73
143
|
function isASCIIByte(a) {
|
|
74
|
-
|
|
144
|
+
return 0x00 <= a && a <= 0x7f;
|
|
75
145
|
}
|
|
76
|
-
|
|
146
|
+
/**
|
|
147
|
+
* An ASCII code point is a code point in the range U+0000 to
|
|
148
|
+
* U+007F, inclusive.
|
|
149
|
+
*/
|
|
77
150
|
var isASCIICodePoint = isASCIIByte;
|
|
78
|
-
|
|
79
|
-
|
|
151
|
+
/**
|
|
152
|
+
* End-of-stream is a special token that signifies no more tokens
|
|
153
|
+
* are in the stream.
|
|
154
|
+
* @const
|
|
155
|
+
*/ var end_of_stream = -1;
|
|
156
|
+
/**
|
|
157
|
+
* A stream represents an ordered sequence of tokens.
|
|
158
|
+
*
|
|
159
|
+
* @constructor
|
|
160
|
+
* @param {!(Array.<number>|Uint8Array)} tokens Array of tokens that provide
|
|
161
|
+
* the stream.
|
|
162
|
+
*/
|
|
80
163
|
function Stream(tokens) {
|
|
81
|
-
|
|
82
|
-
|
|
164
|
+
/** @type {!Array.<number>} */
|
|
165
|
+
this.tokens = [].slice.call(tokens);
|
|
166
|
+
// Reversed as push/pop is more efficient than shift/unshift.
|
|
167
|
+
this.tokens.reverse();
|
|
83
168
|
}
|
|
84
|
-
|
|
85
169
|
Stream.prototype = {
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
170
|
+
/**
|
|
171
|
+
* @return {boolean} True if end-of-stream has been hit.
|
|
172
|
+
*/
|
|
173
|
+
endOfStream: function () {
|
|
174
|
+
return !this.tokens.length;
|
|
175
|
+
},
|
|
176
|
+
/**
|
|
177
|
+
* When a token is read from a stream, the first token in the
|
|
178
|
+
* stream must be returned and subsequently removed, and
|
|
179
|
+
* end-of-stream must be returned otherwise.
|
|
180
|
+
*
|
|
181
|
+
* @return {number} Get the next token from the stream, or
|
|
182
|
+
* end_of_stream.
|
|
183
|
+
*/
|
|
184
|
+
read: function () {
|
|
185
|
+
if (!this.tokens.length)
|
|
186
|
+
return end_of_stream;
|
|
187
|
+
return this.tokens.pop();
|
|
188
|
+
},
|
|
189
|
+
/**
|
|
190
|
+
* When one or more tokens are prepended to a stream, those tokens
|
|
191
|
+
* must be inserted, in given order, before the first token in the
|
|
192
|
+
* stream.
|
|
193
|
+
*
|
|
194
|
+
* @param {(number|!Array.<number>)} token The token(s) to prepend to the
|
|
195
|
+
* stream.
|
|
196
|
+
*/
|
|
197
|
+
prepend: function (token) {
|
|
198
|
+
if (Array.isArray(token)) {
|
|
199
|
+
var tokens = /**@type {!Array.<number>}*/ token;
|
|
200
|
+
while (tokens.length)
|
|
201
|
+
this.tokens.push(tokens.pop());
|
|
202
|
+
}
|
|
203
|
+
else {
|
|
204
|
+
this.tokens.push(token);
|
|
205
|
+
}
|
|
206
|
+
},
|
|
207
|
+
/**
|
|
208
|
+
* When one or more tokens are pushed to a stream, those tokens
|
|
209
|
+
* must be inserted, in given order, after the last token in the
|
|
210
|
+
* stream.
|
|
211
|
+
*
|
|
212
|
+
* @param {(number|!Array.<number>)} token The tokens(s) to push to the
|
|
213
|
+
* stream.
|
|
214
|
+
*/
|
|
215
|
+
push: function (token) {
|
|
216
|
+
if (Array.isArray(token)) {
|
|
217
|
+
var tokens = /**@type {!Array.<number>}*/ token;
|
|
218
|
+
while (tokens.length)
|
|
219
|
+
this.tokens.unshift(tokens.shift());
|
|
220
|
+
}
|
|
221
|
+
else {
|
|
222
|
+
this.tokens.unshift(token);
|
|
223
|
+
}
|
|
109
224
|
}
|
|
110
|
-
}
|
|
111
225
|
};
|
|
226
|
+
//
|
|
227
|
+
// 5. Encodings
|
|
228
|
+
//
|
|
229
|
+
// 5.1 Encoders and decoders
|
|
230
|
+
/** @const */
|
|
112
231
|
var finished = -1;
|
|
113
|
-
|
|
232
|
+
/**
|
|
233
|
+
* @param {boolean} fatal If true, decoding errors raise an exception.
|
|
234
|
+
* @param {number=} opt_code_point Override the standard fallback code point.
|
|
235
|
+
* @return {number} The code point to insert on a decoding error.
|
|
236
|
+
*/
|
|
114
237
|
function decoderError(fatal, opt_code_point) {
|
|
115
|
-
|
|
116
|
-
|
|
238
|
+
if (fatal)
|
|
239
|
+
throw TypeError('Decoder error');
|
|
240
|
+
return opt_code_point || 0xfffd;
|
|
117
241
|
}
|
|
118
|
-
|
|
242
|
+
/**
|
|
243
|
+
* @param {number} code_point The code point that could not be encoded.
|
|
244
|
+
* @return {number} Always throws, no value is actually returned.
|
|
245
|
+
*/
|
|
119
246
|
function encoderError(code_point) {
|
|
120
|
-
|
|
247
|
+
throw TypeError('The code point ' + code_point + ' could not be encoded.');
|
|
121
248
|
}
|
|
122
|
-
|
|
123
|
-
function Decoder() {}
|
|
124
|
-
|
|
249
|
+
/** @interface */
|
|
250
|
+
function Decoder() { }
|
|
125
251
|
Decoder.prototype = {
|
|
126
|
-
|
|
252
|
+
/**
|
|
253
|
+
* @param {Stream} stream The stream of bytes being decoded.
|
|
254
|
+
* @param {number} bite The next byte read from the stream.
|
|
255
|
+
* @return {?(number|!Array.<number>)} The next code point(s)
|
|
256
|
+
* decoded, or null if not enough data exists in the input
|
|
257
|
+
* stream to decode a complete code point, or |finished|.
|
|
258
|
+
*/
|
|
259
|
+
handler: function (stream, bite) { }
|
|
127
260
|
};
|
|
128
|
-
|
|
129
|
-
function Encoder() {}
|
|
130
|
-
|
|
261
|
+
/** @interface */
|
|
262
|
+
function Encoder() { }
|
|
131
263
|
Encoder.prototype = {
|
|
132
|
-
|
|
264
|
+
/**
|
|
265
|
+
* @param {Stream} stream The stream of code points being encoded.
|
|
266
|
+
* @param {number} code_point Next code point read from the stream.
|
|
267
|
+
* @return {(number|!Array.<number>)} Byte(s) to emit, or |finished|.
|
|
268
|
+
*/
|
|
269
|
+
handler: function (stream, code_point) { }
|
|
133
270
|
};
|
|
134
|
-
|
|
271
|
+
// 5.2 Names and labels
|
|
272
|
+
// TODO: Define @typedef for Encoding: {name:string,labels:Array.<string>}
|
|
273
|
+
// https://github.com/google/closure-compiler/issues/247
|
|
274
|
+
/**
|
|
275
|
+
* @param {string} label The encoding label.
|
|
276
|
+
* @return {?{name:string,labels:Array.<string>}}
|
|
277
|
+
*/
|
|
135
278
|
function getEncoding(label) {
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
return
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
279
|
+
// 1. Remove any leading and trailing ASCII whitespace from label.
|
|
280
|
+
label = String(label).trim().toLowerCase();
|
|
281
|
+
// 2. If label is an ASCII case-insensitive match for any of the
|
|
282
|
+
// labels listed in the table below, return the corresponding
|
|
283
|
+
// encoding, and failure otherwise.
|
|
284
|
+
if (Object.prototype.hasOwnProperty.call(label_to_encoding, label)) {
|
|
285
|
+
return label_to_encoding[label];
|
|
286
|
+
}
|
|
287
|
+
return null;
|
|
143
288
|
}
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
289
|
+
/**
|
|
290
|
+
* Encodings table: https://encoding.spec.whatwg.org/encodings.json
|
|
291
|
+
* @const
|
|
292
|
+
* @type {!Array.<{
|
|
293
|
+
* heading: string,
|
|
294
|
+
* encodings: Array.<{name:string,labels:Array.<string>}>
|
|
295
|
+
* }>}
|
|
296
|
+
*/
|
|
297
|
+
var encodings = [
|
|
298
|
+
{
|
|
299
|
+
encodings: [
|
|
300
|
+
{
|
|
301
|
+
labels: ['unicode-1-1-utf-8', 'utf-8', 'utf8'],
|
|
302
|
+
name: 'UTF-8'
|
|
303
|
+
}
|
|
304
|
+
],
|
|
305
|
+
heading: 'The Encoding'
|
|
306
|
+
},
|
|
307
|
+
{
|
|
308
|
+
encodings: [
|
|
309
|
+
{
|
|
310
|
+
labels: ['866', 'cp866', 'csibm866', 'ibm866'],
|
|
311
|
+
name: 'IBM866'
|
|
312
|
+
},
|
|
313
|
+
{
|
|
314
|
+
labels: [
|
|
315
|
+
'csisolatin2',
|
|
316
|
+
'iso-8859-2',
|
|
317
|
+
'iso-ir-101',
|
|
318
|
+
'iso8859-2',
|
|
319
|
+
'iso88592',
|
|
320
|
+
'iso_8859-2',
|
|
321
|
+
'iso_8859-2:1987',
|
|
322
|
+
'l2',
|
|
323
|
+
'latin2'
|
|
324
|
+
],
|
|
325
|
+
name: 'ISO-8859-2'
|
|
326
|
+
},
|
|
327
|
+
{
|
|
328
|
+
labels: [
|
|
329
|
+
'csisolatin3',
|
|
330
|
+
'iso-8859-3',
|
|
331
|
+
'iso-ir-109',
|
|
332
|
+
'iso8859-3',
|
|
333
|
+
'iso88593',
|
|
334
|
+
'iso_8859-3',
|
|
335
|
+
'iso_8859-3:1988',
|
|
336
|
+
'l3',
|
|
337
|
+
'latin3'
|
|
338
|
+
],
|
|
339
|
+
name: 'ISO-8859-3'
|
|
340
|
+
},
|
|
341
|
+
{
|
|
342
|
+
labels: [
|
|
343
|
+
'csisolatin4',
|
|
344
|
+
'iso-8859-4',
|
|
345
|
+
'iso-ir-110',
|
|
346
|
+
'iso8859-4',
|
|
347
|
+
'iso88594',
|
|
348
|
+
'iso_8859-4',
|
|
349
|
+
'iso_8859-4:1988',
|
|
350
|
+
'l4',
|
|
351
|
+
'latin4'
|
|
352
|
+
],
|
|
353
|
+
name: 'ISO-8859-4'
|
|
354
|
+
},
|
|
355
|
+
{
|
|
356
|
+
labels: [
|
|
357
|
+
'csisolatincyrillic',
|
|
358
|
+
'cyrillic',
|
|
359
|
+
'iso-8859-5',
|
|
360
|
+
'iso-ir-144',
|
|
361
|
+
'iso8859-5',
|
|
362
|
+
'iso88595',
|
|
363
|
+
'iso_8859-5',
|
|
364
|
+
'iso_8859-5:1988'
|
|
365
|
+
],
|
|
366
|
+
name: 'ISO-8859-5'
|
|
367
|
+
},
|
|
368
|
+
{
|
|
369
|
+
labels: [
|
|
370
|
+
'arabic',
|
|
371
|
+
'asmo-708',
|
|
372
|
+
'csiso88596e',
|
|
373
|
+
'csiso88596i',
|
|
374
|
+
'csisolatinarabic',
|
|
375
|
+
'ecma-114',
|
|
376
|
+
'iso-8859-6',
|
|
377
|
+
'iso-8859-6-e',
|
|
378
|
+
'iso-8859-6-i',
|
|
379
|
+
'iso-ir-127',
|
|
380
|
+
'iso8859-6',
|
|
381
|
+
'iso88596',
|
|
382
|
+
'iso_8859-6',
|
|
383
|
+
'iso_8859-6:1987'
|
|
384
|
+
],
|
|
385
|
+
name: 'ISO-8859-6'
|
|
386
|
+
},
|
|
387
|
+
{
|
|
388
|
+
labels: [
|
|
389
|
+
'csisolatingreek',
|
|
390
|
+
'ecma-118',
|
|
391
|
+
'elot_928',
|
|
392
|
+
'greek',
|
|
393
|
+
'greek8',
|
|
394
|
+
'iso-8859-7',
|
|
395
|
+
'iso-ir-126',
|
|
396
|
+
'iso8859-7',
|
|
397
|
+
'iso88597',
|
|
398
|
+
'iso_8859-7',
|
|
399
|
+
'iso_8859-7:1987',
|
|
400
|
+
'sun_eu_greek'
|
|
401
|
+
],
|
|
402
|
+
name: 'ISO-8859-7'
|
|
403
|
+
},
|
|
404
|
+
{
|
|
405
|
+
labels: [
|
|
406
|
+
'csiso88598e',
|
|
407
|
+
'csisolatinhebrew',
|
|
408
|
+
'hebrew',
|
|
409
|
+
'iso-8859-8',
|
|
410
|
+
'iso-8859-8-e',
|
|
411
|
+
'iso-ir-138',
|
|
412
|
+
'iso8859-8',
|
|
413
|
+
'iso88598',
|
|
414
|
+
'iso_8859-8',
|
|
415
|
+
'iso_8859-8:1988',
|
|
416
|
+
'visual'
|
|
417
|
+
],
|
|
418
|
+
name: 'ISO-8859-8'
|
|
419
|
+
},
|
|
420
|
+
{
|
|
421
|
+
labels: ['csiso88598i', 'iso-8859-8-i', 'logical'],
|
|
422
|
+
name: 'ISO-8859-8-I'
|
|
423
|
+
},
|
|
424
|
+
{
|
|
425
|
+
labels: [
|
|
426
|
+
'csisolatin6',
|
|
427
|
+
'iso-8859-10',
|
|
428
|
+
'iso-ir-157',
|
|
429
|
+
'iso8859-10',
|
|
430
|
+
'iso885910',
|
|
431
|
+
'l6',
|
|
432
|
+
'latin6'
|
|
433
|
+
],
|
|
434
|
+
name: 'ISO-8859-10'
|
|
435
|
+
},
|
|
436
|
+
{
|
|
437
|
+
labels: ['iso-8859-13', 'iso8859-13', 'iso885913'],
|
|
438
|
+
name: 'ISO-8859-13'
|
|
439
|
+
},
|
|
440
|
+
{
|
|
441
|
+
labels: ['iso-8859-14', 'iso8859-14', 'iso885914'],
|
|
442
|
+
name: 'ISO-8859-14'
|
|
443
|
+
},
|
|
444
|
+
{
|
|
445
|
+
labels: ['csisolatin9', 'iso-8859-15', 'iso8859-15', 'iso885915', 'iso_8859-15', 'l9'],
|
|
446
|
+
name: 'ISO-8859-15'
|
|
447
|
+
},
|
|
448
|
+
{
|
|
449
|
+
labels: ['iso-8859-16'],
|
|
450
|
+
name: 'ISO-8859-16'
|
|
451
|
+
},
|
|
452
|
+
{
|
|
453
|
+
labels: ['cskoi8r', 'koi', 'koi8', 'koi8-r', 'koi8_r'],
|
|
454
|
+
name: 'KOI8-R'
|
|
455
|
+
},
|
|
456
|
+
{
|
|
457
|
+
labels: ['koi8-ru', 'koi8-u'],
|
|
458
|
+
name: 'KOI8-U'
|
|
459
|
+
},
|
|
460
|
+
{
|
|
461
|
+
labels: ['csmacintosh', 'mac', 'macintosh', 'x-mac-roman'],
|
|
462
|
+
name: 'macintosh'
|
|
463
|
+
},
|
|
464
|
+
{
|
|
465
|
+
labels: ['dos-874', 'iso-8859-11', 'iso8859-11', 'iso885911', 'tis-620', 'windows-874'],
|
|
466
|
+
name: 'windows-874'
|
|
467
|
+
},
|
|
468
|
+
{
|
|
469
|
+
labels: ['cp1250', 'windows-1250', 'x-cp1250'],
|
|
470
|
+
name: 'windows-1250'
|
|
471
|
+
},
|
|
472
|
+
{
|
|
473
|
+
labels: ['cp1251', 'windows-1251', 'x-cp1251'],
|
|
474
|
+
name: 'windows-1251'
|
|
475
|
+
},
|
|
476
|
+
{
|
|
477
|
+
labels: [
|
|
478
|
+
'ansi_x3.4-1968',
|
|
479
|
+
'ascii',
|
|
480
|
+
'cp1252',
|
|
481
|
+
'cp819',
|
|
482
|
+
'csisolatin1',
|
|
483
|
+
'ibm819',
|
|
484
|
+
'iso-8859-1',
|
|
485
|
+
'iso-ir-100',
|
|
486
|
+
'iso8859-1',
|
|
487
|
+
'iso88591',
|
|
488
|
+
'iso_8859-1',
|
|
489
|
+
'iso_8859-1:1987',
|
|
490
|
+
'l1',
|
|
491
|
+
'latin1',
|
|
492
|
+
'us-ascii',
|
|
493
|
+
'windows-1252',
|
|
494
|
+
'x-cp1252'
|
|
495
|
+
],
|
|
496
|
+
name: 'windows-1252'
|
|
497
|
+
},
|
|
498
|
+
{
|
|
499
|
+
labels: ['cp1253', 'windows-1253', 'x-cp1253'],
|
|
500
|
+
name: 'windows-1253'
|
|
501
|
+
},
|
|
502
|
+
{
|
|
503
|
+
labels: [
|
|
504
|
+
'cp1254',
|
|
505
|
+
'csisolatin5',
|
|
506
|
+
'iso-8859-9',
|
|
507
|
+
'iso-ir-148',
|
|
508
|
+
'iso8859-9',
|
|
509
|
+
'iso88599',
|
|
510
|
+
'iso_8859-9',
|
|
511
|
+
'iso_8859-9:1989',
|
|
512
|
+
'l5',
|
|
513
|
+
'latin5',
|
|
514
|
+
'windows-1254',
|
|
515
|
+
'x-cp1254'
|
|
516
|
+
],
|
|
517
|
+
name: 'windows-1254'
|
|
518
|
+
},
|
|
519
|
+
{
|
|
520
|
+
labels: ['cp1255', 'windows-1255', 'x-cp1255'],
|
|
521
|
+
name: 'windows-1255'
|
|
522
|
+
},
|
|
523
|
+
{
|
|
524
|
+
labels: ['cp1256', 'windows-1256', 'x-cp1256'],
|
|
525
|
+
name: 'windows-1256'
|
|
526
|
+
},
|
|
527
|
+
{
|
|
528
|
+
labels: ['cp1257', 'windows-1257', 'x-cp1257'],
|
|
529
|
+
name: 'windows-1257'
|
|
530
|
+
},
|
|
531
|
+
{
|
|
532
|
+
labels: ['cp1258', 'windows-1258', 'x-cp1258'],
|
|
533
|
+
name: 'windows-1258'
|
|
534
|
+
},
|
|
535
|
+
{
|
|
536
|
+
labels: ['x-mac-cyrillic', 'x-mac-ukrainian'],
|
|
537
|
+
name: 'x-mac-cyrillic'
|
|
538
|
+
}
|
|
539
|
+
],
|
|
540
|
+
heading: 'Legacy single-byte encodings'
|
|
541
|
+
},
|
|
542
|
+
{
|
|
543
|
+
encodings: [
|
|
544
|
+
{
|
|
545
|
+
labels: [
|
|
546
|
+
'chinese',
|
|
547
|
+
'csgb2312',
|
|
548
|
+
'csiso58gb231280',
|
|
549
|
+
'gb2312',
|
|
550
|
+
'gb_2312',
|
|
551
|
+
'gb_2312-80',
|
|
552
|
+
'gbk',
|
|
553
|
+
'iso-ir-58',
|
|
554
|
+
'x-gbk'
|
|
555
|
+
],
|
|
556
|
+
name: 'GBK'
|
|
557
|
+
},
|
|
558
|
+
{
|
|
559
|
+
labels: ['gb18030'],
|
|
560
|
+
name: 'gb18030'
|
|
561
|
+
}
|
|
562
|
+
],
|
|
563
|
+
heading: 'Legacy multi-byte Chinese (simplified) encodings'
|
|
564
|
+
},
|
|
565
|
+
{
|
|
566
|
+
encodings: [
|
|
567
|
+
{
|
|
568
|
+
labels: ['big5', 'big5-hkscs', 'cn-big5', 'csbig5', 'x-x-big5'],
|
|
569
|
+
name: 'Big5'
|
|
570
|
+
}
|
|
571
|
+
],
|
|
572
|
+
heading: 'Legacy multi-byte Chinese (traditional) encodings'
|
|
573
|
+
},
|
|
574
|
+
{
|
|
575
|
+
encodings: [
|
|
576
|
+
{
|
|
577
|
+
labels: ['cseucpkdfmtjapanese', 'euc-jp', 'x-euc-jp'],
|
|
578
|
+
name: 'EUC-JP'
|
|
579
|
+
},
|
|
580
|
+
{
|
|
581
|
+
labels: ['csiso2022jp', 'iso-2022-jp'],
|
|
582
|
+
name: 'ISO-2022-JP'
|
|
583
|
+
},
|
|
584
|
+
{
|
|
585
|
+
labels: [
|
|
586
|
+
'csshiftjis',
|
|
587
|
+
'ms932',
|
|
588
|
+
'ms_kanji',
|
|
589
|
+
'shift-jis',
|
|
590
|
+
'shift_jis',
|
|
591
|
+
'sjis',
|
|
592
|
+
'windows-31j',
|
|
593
|
+
'x-sjis'
|
|
594
|
+
],
|
|
595
|
+
name: 'Shift_JIS'
|
|
596
|
+
}
|
|
597
|
+
],
|
|
598
|
+
heading: 'Legacy multi-byte Japanese encodings'
|
|
599
|
+
},
|
|
600
|
+
{
|
|
601
|
+
encodings: [
|
|
602
|
+
{
|
|
603
|
+
labels: [
|
|
604
|
+
'cseuckr',
|
|
605
|
+
'csksc56011987',
|
|
606
|
+
'euc-kr',
|
|
607
|
+
'iso-ir-149',
|
|
608
|
+
'korean',
|
|
609
|
+
'ks_c_5601-1987',
|
|
610
|
+
'ks_c_5601-1989',
|
|
611
|
+
'ksc5601',
|
|
612
|
+
'ksc_5601',
|
|
613
|
+
'windows-949'
|
|
614
|
+
],
|
|
615
|
+
name: 'EUC-KR'
|
|
616
|
+
}
|
|
617
|
+
],
|
|
618
|
+
heading: 'Legacy multi-byte Korean encodings'
|
|
619
|
+
},
|
|
620
|
+
{
|
|
621
|
+
encodings: [
|
|
622
|
+
{
|
|
623
|
+
labels: ['csiso2022kr', 'hz-gb-2312', 'iso-2022-cn', 'iso-2022-cn-ext', 'iso-2022-kr'],
|
|
624
|
+
name: 'replacement'
|
|
625
|
+
},
|
|
626
|
+
{
|
|
627
|
+
labels: ['utf-16be'],
|
|
628
|
+
name: 'UTF-16BE'
|
|
629
|
+
},
|
|
630
|
+
{
|
|
631
|
+
labels: ['utf-16', 'utf-16le'],
|
|
632
|
+
name: 'UTF-16LE'
|
|
633
|
+
},
|
|
634
|
+
{
|
|
635
|
+
labels: ['x-user-defined'],
|
|
636
|
+
name: 'x-user-defined'
|
|
637
|
+
}
|
|
638
|
+
],
|
|
639
|
+
heading: 'Legacy miscellaneous encodings'
|
|
640
|
+
}
|
|
641
|
+
];
|
|
642
|
+
// Label to encoding registry.
|
|
643
|
+
/** @type {Object.<string,{name:string,labels:Array.<string>}>} */
|
|
287
644
|
var label_to_encoding = {};
|
|
288
645
|
encodings.forEach(function (category) {
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
646
|
+
category.encodings.forEach(function (encoding) {
|
|
647
|
+
encoding.labels.forEach(function (label) {
|
|
648
|
+
label_to_encoding[label] = encoding;
|
|
649
|
+
});
|
|
292
650
|
});
|
|
293
|
-
});
|
|
294
651
|
});
|
|
652
|
+
// Registry of of encoder/decoder factories, by encoding name.
|
|
653
|
+
/** @type {Object.<string, function({fatal:boolean}): Encoder>} */
|
|
295
654
|
var encoders = {};
|
|
655
|
+
/** @type {Object.<string, function({fatal:boolean}): Decoder>} */
|
|
296
656
|
var decoders = {};
|
|
297
|
-
|
|
657
|
+
//
|
|
658
|
+
// 6. Indexes
|
|
659
|
+
//
|
|
660
|
+
/**
|
|
661
|
+
* @param {number} pointer The |pointer| to search for.
|
|
662
|
+
* @param {(!Array.<?number>|undefined)} index The |index| to search within.
|
|
663
|
+
* @return {?number} The code point corresponding to |pointer| in |index|,
|
|
664
|
+
* or null if |code point| is not in |index|.
|
|
665
|
+
*/
|
|
298
666
|
function indexCodePointFor(pointer, index) {
|
|
299
|
-
|
|
300
|
-
|
|
667
|
+
if (!index)
|
|
668
|
+
return null;
|
|
669
|
+
return index[pointer] || null;
|
|
301
670
|
}
|
|
302
|
-
|
|
671
|
+
/**
|
|
672
|
+
* @param {number} code_point The |code point| to search for.
|
|
673
|
+
* @param {!Array.<?number>} index The |index| to search within.
|
|
674
|
+
* @return {?number} The first pointer corresponding to |code point| in
|
|
675
|
+
* |index|, or null if |code point| is not in |index|.
|
|
676
|
+
*/
|
|
303
677
|
function indexPointerFor(code_point, index) {
|
|
304
|
-
|
|
305
|
-
|
|
678
|
+
var pointer = index.indexOf(code_point);
|
|
679
|
+
return pointer === -1 ? null : pointer;
|
|
306
680
|
}
|
|
307
|
-
|
|
681
|
+
/**
|
|
682
|
+
* @param {string} name Name of the index.
|
|
683
|
+
* @return {(!Array.<number>|!Array.<Array.<number>>)}
|
|
684
|
+
* */
|
|
308
685
|
function index(name) {
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
return global['encoding-indexes'][name];
|
|
686
|
+
if (!('encoding-indexes' in global)) {
|
|
687
|
+
throw Error('Indexes missing.' + ' Did you forget to include encoding-indexes.js first?');
|
|
688
|
+
}
|
|
689
|
+
return global['encoding-indexes'][name];
|
|
314
690
|
}
|
|
315
|
-
|
|
691
|
+
/**
|
|
692
|
+
* @param {number} pointer The |pointer| to search for in the gb18030 index.
|
|
693
|
+
* @return {?number} The code point corresponding to |pointer| in |index|,
|
|
694
|
+
* or null if |code point| is not in the gb18030 index.
|
|
695
|
+
*/
|
|
316
696
|
function indexGB18030RangesCodePointFor(pointer) {
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
697
|
+
// 1. If pointer is greater than 39419 and less than 189000, or
|
|
698
|
+
// pointer is greater than 1237575, return null.
|
|
699
|
+
if ((pointer > 39419 && pointer < 189000) || pointer > 1237575)
|
|
700
|
+
return null;
|
|
701
|
+
// 2. If pointer is 7457, return code point U+E7C7.
|
|
702
|
+
if (pointer === 7457)
|
|
703
|
+
return 0xe7c7;
|
|
704
|
+
// 3. Let offset be the last pointer in index gb18030 ranges that
|
|
705
|
+
// is equal to or less than pointer and let code point offset be
|
|
706
|
+
// its corresponding code point.
|
|
707
|
+
var offset = 0;
|
|
708
|
+
var code_point_offset = 0;
|
|
709
|
+
var idx = index('gb18030-ranges');
|
|
710
|
+
var i;
|
|
711
|
+
for (i = 0; i < idx.length; ++i) {
|
|
712
|
+
/** @type {!Array.<number>} */
|
|
713
|
+
var entry = idx[i];
|
|
714
|
+
if (entry[0] <= pointer) {
|
|
715
|
+
offset = entry[0];
|
|
716
|
+
code_point_offset = entry[1];
|
|
717
|
+
}
|
|
718
|
+
else {
|
|
719
|
+
break;
|
|
720
|
+
}
|
|
332
721
|
}
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
722
|
+
// 4. Return a code point whose value is code point offset +
|
|
723
|
+
// pointer − offset.
|
|
724
|
+
return code_point_offset + pointer - offset;
|
|
336
725
|
}
|
|
337
|
-
|
|
726
|
+
/**
|
|
727
|
+
* @param {number} code_point The |code point| to locate in the gb18030 index.
|
|
728
|
+
* @return {number} The first pointer corresponding to |code point| in the
|
|
729
|
+
* gb18030 index.
|
|
730
|
+
*/
|
|
338
731
|
function indexGB18030RangesPointerFor(code_point) {
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
var
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
732
|
+
// 1. If code point is U+E7C7, return pointer 7457.
|
|
733
|
+
if (code_point === 0xe7c7)
|
|
734
|
+
return 7457;
|
|
735
|
+
// 2. Let offset be the last code point in index gb18030 ranges
|
|
736
|
+
// that is equal to or less than code point and let pointer offset
|
|
737
|
+
// be its corresponding pointer.
|
|
738
|
+
var offset = 0;
|
|
739
|
+
var pointer_offset = 0;
|
|
740
|
+
var idx = index('gb18030-ranges');
|
|
741
|
+
var i;
|
|
742
|
+
for (i = 0; i < idx.length; ++i) {
|
|
743
|
+
/** @type {!Array.<number>} */
|
|
744
|
+
var entry = idx[i];
|
|
745
|
+
if (entry[1] <= code_point) {
|
|
746
|
+
offset = entry[1];
|
|
747
|
+
pointer_offset = entry[0];
|
|
748
|
+
}
|
|
749
|
+
else {
|
|
750
|
+
break;
|
|
751
|
+
}
|
|
353
752
|
}
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
753
|
+
// 3. Return a pointer whose value is pointer offset + code point
|
|
754
|
+
// − offset.
|
|
755
|
+
return pointer_offset + code_point - offset;
|
|
357
756
|
}
|
|
358
|
-
|
|
757
|
+
/**
|
|
758
|
+
* @param {number} code_point The |code_point| to search for in the Shift_JIS
|
|
759
|
+
* index.
|
|
760
|
+
* @return {?number} The code point corresponding to |pointer| in |index|,
|
|
761
|
+
* or null if |code point| is not in the Shift_JIS index.
|
|
762
|
+
*/
|
|
359
763
|
function indexShiftJISPointerFor(code_point) {
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
764
|
+
// 1. Let index be index jis0208 excluding all entries whose
|
|
765
|
+
// pointer is in the range 8272 to 8835, inclusive.
|
|
766
|
+
shift_jis_index =
|
|
767
|
+
shift_jis_index ||
|
|
768
|
+
index('jis0208').map(function (code_point, pointer) {
|
|
769
|
+
return inRange(pointer, 8272, 8835) ? null : code_point;
|
|
770
|
+
});
|
|
771
|
+
var index_ = shift_jis_index;
|
|
772
|
+
// 2. Return the index pointer for code point in index.
|
|
773
|
+
return index_.indexOf(code_point);
|
|
365
774
|
}
|
|
366
|
-
|
|
367
775
|
var shift_jis_index;
|
|
368
|
-
|
|
776
|
+
/**
|
|
777
|
+
* @param {number} code_point The |code_point| to search for in the big5
|
|
778
|
+
* index.
|
|
779
|
+
* @return {?number} The code point corresponding to |pointer| in |index|,
|
|
780
|
+
* or null if |code point| is not in the big5 index.
|
|
781
|
+
*/
|
|
369
782
|
function indexBig5PointerFor(code_point) {
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
783
|
+
// 1. Let index be index Big5 excluding all entries whose pointer
|
|
784
|
+
big5_index_no_hkscs =
|
|
785
|
+
big5_index_no_hkscs ||
|
|
786
|
+
index('big5').map(function (code_point, pointer) {
|
|
787
|
+
return pointer < (0xa1 - 0x81) * 157 ? null : code_point;
|
|
788
|
+
});
|
|
789
|
+
var index_ = big5_index_no_hkscs;
|
|
790
|
+
// 2. If code point is U+2550, U+255E, U+2561, U+256A, U+5341, or
|
|
791
|
+
// U+5345, return the last pointer corresponding to code point in
|
|
792
|
+
// index.
|
|
793
|
+
if (code_point === 0x2550 ||
|
|
794
|
+
code_point === 0x255e ||
|
|
795
|
+
code_point === 0x2561 ||
|
|
796
|
+
code_point === 0x256a ||
|
|
797
|
+
code_point === 0x5341 ||
|
|
798
|
+
code_point === 0x5345) {
|
|
799
|
+
return index_.lastIndexOf(code_point);
|
|
800
|
+
}
|
|
801
|
+
// 3. Return the index pointer for code point in index.
|
|
802
|
+
return indexPointerFor(code_point, index_);
|
|
380
803
|
}
|
|
381
|
-
|
|
382
804
|
var big5_index_no_hkscs;
|
|
383
|
-
|
|
384
|
-
|
|
805
|
+
//
|
|
806
|
+
// 8. API
|
|
807
|
+
//
|
|
808
|
+
/** @const */ var DEFAULT_ENCODING = 'utf-8';
|
|
809
|
+
// 8.1 Interface TextDecoder
|
|
810
|
+
/**
|
|
811
|
+
* @constructor
|
|
812
|
+
* @param {string=} label The label of the encoding;
|
|
813
|
+
* defaults to 'utf-8'.
|
|
814
|
+
* @param {Object=} options
|
|
815
|
+
*/
|
|
385
816
|
function TextDecoder(label, options) {
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
817
|
+
// Web IDL conventions
|
|
818
|
+
if (!(this instanceof TextDecoder))
|
|
819
|
+
throw TypeError("Called as a function. Did you forget 'new'?");
|
|
820
|
+
label = label !== undefined ? String(label) : DEFAULT_ENCODING;
|
|
821
|
+
options = ToDictionary(options);
|
|
822
|
+
// A TextDecoder object has an associated encoding, decoder,
|
|
823
|
+
// stream, ignore BOM flag (initially unset), BOM seen flag
|
|
824
|
+
// (initially unset), error mode (initially replacement), and do
|
|
825
|
+
// not flush flag (initially unset).
|
|
826
|
+
/** @private */
|
|
827
|
+
this._encoding = null;
|
|
828
|
+
/** @private @type {?Decoder} */
|
|
829
|
+
this._decoder = null;
|
|
830
|
+
/** @private @type {boolean} */
|
|
831
|
+
this._ignoreBOM = false;
|
|
832
|
+
/** @private @type {boolean} */
|
|
833
|
+
this._BOMseen = false;
|
|
834
|
+
/** @private @type {string} */
|
|
835
|
+
this._error_mode = 'replacement';
|
|
836
|
+
/** @private @type {boolean} */
|
|
837
|
+
this._do_not_flush = false;
|
|
838
|
+
// 1. Let encoding be the result of getting an encoding from
|
|
839
|
+
// label.
|
|
840
|
+
var encoding = getEncoding(label);
|
|
841
|
+
// 2. If encoding is failure or replacement, throw a RangeError.
|
|
842
|
+
if (encoding === null || encoding.name === 'replacement')
|
|
843
|
+
throw RangeError('Unknown encoding: ' + label);
|
|
844
|
+
if (!decoders[encoding.name]) {
|
|
845
|
+
throw Error('Decoder not present.' + ' Did you forget to include encoding-indexes.js first?');
|
|
846
|
+
}
|
|
847
|
+
// 3. Let dec be a new TextDecoder object.
|
|
848
|
+
var dec = this;
|
|
849
|
+
// 4. Set dec's encoding to encoding.
|
|
850
|
+
dec._encoding = encoding;
|
|
851
|
+
// 5. If options's fatal member is true, set dec's error mode to
|
|
852
|
+
// fatal.
|
|
853
|
+
if (Boolean(options['fatal']))
|
|
854
|
+
dec._error_mode = 'fatal';
|
|
855
|
+
// 6. If options's ignoreBOM member is true, set dec's ignore BOM
|
|
856
|
+
// flag.
|
|
857
|
+
if (Boolean(options['ignoreBOM']))
|
|
858
|
+
dec._ignoreBOM = true;
|
|
859
|
+
// For pre-ES5 runtimes:
|
|
860
|
+
if (!Object.defineProperty) {
|
|
861
|
+
this.encoding = dec._encoding.name.toLowerCase();
|
|
862
|
+
this.fatal = dec._error_mode === 'fatal';
|
|
863
|
+
this.ignoreBOM = dec._ignoreBOM;
|
|
864
|
+
}
|
|
865
|
+
// 7. Return dec.
|
|
866
|
+
return dec;
|
|
414
867
|
}
|
|
415
|
-
|
|
868
|
+
exports.TextDecoder = TextDecoder;
|
|
416
869
|
if (Object.defineProperty) {
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
870
|
+
// The encoding attribute's getter must return encoding's name.
|
|
871
|
+
Object.defineProperty(TextDecoder.prototype, 'encoding', {
|
|
872
|
+
/** @this {TextDecoder} */
|
|
873
|
+
get: function () {
|
|
874
|
+
return this._encoding.name.toLowerCase();
|
|
875
|
+
}
|
|
876
|
+
});
|
|
877
|
+
// The fatal attribute's getter must return true if error mode
|
|
878
|
+
// is fatal, and false otherwise.
|
|
879
|
+
Object.defineProperty(TextDecoder.prototype, 'fatal', {
|
|
880
|
+
/** @this {TextDecoder} */
|
|
881
|
+
get: function () {
|
|
882
|
+
return this._error_mode === 'fatal';
|
|
883
|
+
}
|
|
884
|
+
});
|
|
885
|
+
// The ignoreBOM attribute's getter must return true if ignore
|
|
886
|
+
// BOM flag is set, and false otherwise.
|
|
887
|
+
Object.defineProperty(TextDecoder.prototype, 'ignoreBOM', {
|
|
888
|
+
/** @this {TextDecoder} */
|
|
889
|
+
get: function () {
|
|
890
|
+
return this._ignoreBOM;
|
|
891
|
+
}
|
|
892
|
+
});
|
|
893
|
+
}
|
|
894
|
+
/**
|
|
895
|
+
* @param {BufferSource=} input The buffer of bytes to decode.
|
|
896
|
+
* @param {Object=} options
|
|
897
|
+
* @return {string} The decoded string.
|
|
898
|
+
*/
|
|
899
|
+
TextDecoder.prototype.decode = function decode(input, options) {
|
|
900
|
+
var bytes;
|
|
901
|
+
if (typeof input === 'object' && input instanceof ArrayBuffer) {
|
|
902
|
+
bytes = new Uint8Array(input);
|
|
420
903
|
}
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
904
|
+
else if (typeof input === 'object' &&
|
|
905
|
+
'buffer' in input &&
|
|
906
|
+
input.buffer instanceof ArrayBuffer) {
|
|
907
|
+
bytes = new Uint8Array(input.buffer, input.byteOffset, input.byteLength);
|
|
425
908
|
}
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
get: function () {
|
|
429
|
-
return this._ignoreBOM;
|
|
909
|
+
else {
|
|
910
|
+
bytes = new Uint8Array(0);
|
|
430
911
|
}
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
bytes = new Uint8Array(input.buffer, input.byteOffset, input.byteLength);
|
|
441
|
-
} else {
|
|
442
|
-
bytes = new Uint8Array(0);
|
|
443
|
-
}
|
|
444
|
-
|
|
445
|
-
options = ToDictionary(options);
|
|
446
|
-
|
|
447
|
-
if (!this._do_not_flush) {
|
|
448
|
-
this._decoder = decoders[this._encoding.name]({
|
|
449
|
-
fatal: this._error_mode === 'fatal'
|
|
450
|
-
});
|
|
451
|
-
this._BOMseen = false;
|
|
452
|
-
}
|
|
453
|
-
|
|
454
|
-
this._do_not_flush = Boolean(options['stream']);
|
|
455
|
-
var input_stream = new Stream(bytes);
|
|
456
|
-
var output = [];
|
|
457
|
-
var result;
|
|
458
|
-
|
|
459
|
-
while (true) {
|
|
460
|
-
var token = input_stream.read();
|
|
461
|
-
if (token === end_of_stream) break;
|
|
462
|
-
result = this._decoder.handler(input_stream, token);
|
|
463
|
-
if (result === finished) break;
|
|
464
|
-
|
|
465
|
-
if (result !== null) {
|
|
466
|
-
if (Array.isArray(result)) output.push.apply(output, result);else output.push(result);
|
|
912
|
+
options = ToDictionary(options);
|
|
913
|
+
// 1. If the do not flush flag is unset, set decoder to a new
|
|
914
|
+
// encoding's decoder, set stream to a new stream, and unset the
|
|
915
|
+
// BOM seen flag.
|
|
916
|
+
if (!this._do_not_flush) {
|
|
917
|
+
this._decoder = decoders[this._encoding.name]({
|
|
918
|
+
fatal: this._error_mode === 'fatal'
|
|
919
|
+
});
|
|
920
|
+
this._BOMseen = false;
|
|
467
921
|
}
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
922
|
+
// 2. If options's stream is true, set the do not flush flag, and
|
|
923
|
+
// unset the do not flush flag otherwise.
|
|
924
|
+
this._do_not_flush = Boolean(options['stream']);
|
|
925
|
+
// 3. If input is given, push a copy of input to stream.
|
|
926
|
+
// TODO: Align with spec algorithm - maintain stream on instance.
|
|
927
|
+
var input_stream = new Stream(bytes);
|
|
928
|
+
// 4. Let output be a new stream.
|
|
929
|
+
var output = [];
|
|
930
|
+
/** @type {?(number|!Array.<number>)} */
|
|
931
|
+
var result;
|
|
932
|
+
// 5. While true:
|
|
933
|
+
while (true) {
|
|
934
|
+
// 1. Let token be the result of reading from stream.
|
|
935
|
+
var token = input_stream.read();
|
|
936
|
+
// 2. If token is end-of-stream and the do not flush flag is
|
|
937
|
+
// set, return output, serialized.
|
|
938
|
+
// TODO: Align with spec algorithm.
|
|
939
|
+
if (token === end_of_stream)
|
|
940
|
+
break;
|
|
941
|
+
// 3. Otherwise, run these subsubsteps:
|
|
942
|
+
// 1. Let result be the result of processing token for decoder,
|
|
943
|
+
// stream, output, and error mode.
|
|
944
|
+
result = this._decoder.handler(input_stream, token);
|
|
945
|
+
// 2. If result is finished, return output, serialized.
|
|
946
|
+
if (result === finished)
|
|
947
|
+
break;
|
|
948
|
+
if (result !== null) {
|
|
949
|
+
if (Array.isArray(result))
|
|
950
|
+
output.push.apply(output, /**@type {!Array.<number>}*/ result);
|
|
951
|
+
else
|
|
952
|
+
output.push(result);
|
|
953
|
+
}
|
|
954
|
+
// 3. Otherwise, if result is error, throw a TypeError.
|
|
955
|
+
// (Thrown in handler)
|
|
956
|
+
// 4. Otherwise, do nothing.
|
|
489
957
|
}
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
958
|
+
// TODO: Align with spec algorithm.
|
|
959
|
+
if (!this._do_not_flush) {
|
|
960
|
+
do {
|
|
961
|
+
result = this._decoder.handler(input_stream, input_stream.read());
|
|
962
|
+
if (result === finished)
|
|
963
|
+
break;
|
|
964
|
+
if (result === null)
|
|
965
|
+
continue;
|
|
966
|
+
if (Array.isArray(result))
|
|
967
|
+
output.push.apply(output, /**@type {!Array.<number>}*/ result);
|
|
968
|
+
else
|
|
969
|
+
output.push(result);
|
|
970
|
+
} while (!input_stream.endOfStream());
|
|
971
|
+
this._decoder = null;
|
|
972
|
+
}
|
|
973
|
+
// A TextDecoder object also has an associated serialize stream
|
|
974
|
+
// algorithm...
|
|
975
|
+
/**
|
|
976
|
+
* @param {!Array.<number>} stream
|
|
977
|
+
* @return {string}
|
|
978
|
+
* @this {TextDecoder}
|
|
979
|
+
*/
|
|
980
|
+
function serializeStream(stream) {
|
|
981
|
+
// 1. Let token be the result of reading from stream.
|
|
982
|
+
// (Done in-place on array, rather than as a stream)
|
|
983
|
+
// 2. If encoding is UTF-8, UTF-16BE, or UTF-16LE, and ignore
|
|
984
|
+
// BOM flag and BOM seen flag are unset, run these subsubsteps:
|
|
985
|
+
if (includes(['UTF-8', 'UTF-16LE', 'UTF-16BE'], this._encoding.name) &&
|
|
986
|
+
!this._ignoreBOM &&
|
|
987
|
+
!this._BOMseen) {
|
|
988
|
+
if (stream.length > 0 && stream[0] === 0xfeff) {
|
|
989
|
+
// 1. If token is U+FEFF, set BOM seen flag.
|
|
990
|
+
this._BOMseen = true;
|
|
991
|
+
stream.shift();
|
|
992
|
+
}
|
|
993
|
+
else if (stream.length > 0) {
|
|
994
|
+
// 2. Otherwise, if token is not end-of-stream, set BOM seen
|
|
995
|
+
// flag and append token to stream.
|
|
996
|
+
this._BOMseen = true;
|
|
997
|
+
}
|
|
998
|
+
else {
|
|
999
|
+
// 3. Otherwise, if token is not end-of-stream, append token
|
|
1000
|
+
// to output.
|
|
1001
|
+
// (no-op)
|
|
1002
|
+
}
|
|
1003
|
+
}
|
|
1004
|
+
// 4. Otherwise, return output.
|
|
1005
|
+
return codePointsToString(stream);
|
|
1006
|
+
}
|
|
1007
|
+
return serializeStream.call(this, output);
|
|
495
1008
|
};
|
|
496
|
-
|
|
1009
|
+
// 8.2 Interface TextEncoder
|
|
1010
|
+
/**
|
|
1011
|
+
* @constructor
|
|
1012
|
+
* @param {string=} label The label of the encoding. NONSTANDARD.
|
|
1013
|
+
* @param {Object=} options NONSTANDARD.
|
|
1014
|
+
*/
|
|
497
1015
|
function TextEncoder(label, options) {
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
1016
|
+
// Web IDL conventions
|
|
1017
|
+
if (!(this instanceof TextEncoder))
|
|
1018
|
+
throw TypeError("Called as a function. Did you forget 'new'?");
|
|
1019
|
+
options = ToDictionary(options);
|
|
1020
|
+
// A TextEncoder object has an associated encoding and encoder.
|
|
1021
|
+
/** @private */
|
|
1022
|
+
this._encoding = null;
|
|
1023
|
+
/** @private @type {?Encoder} */
|
|
1024
|
+
this._encoder = null;
|
|
1025
|
+
// Non-standard
|
|
1026
|
+
/** @private @type {boolean} */
|
|
1027
|
+
this._do_not_flush = false;
|
|
1028
|
+
/** @private @type {string} */
|
|
1029
|
+
this._fatal = Boolean(options['fatal']) ? 'fatal' : 'replacement';
|
|
1030
|
+
// 1. Let enc be a new TextEncoder object.
|
|
1031
|
+
var enc = this;
|
|
1032
|
+
// 2. Set enc's encoding to UTF-8's encoder.
|
|
1033
|
+
if (Boolean(options['NONSTANDARD_allowLegacyEncoding'])) {
|
|
1034
|
+
// NONSTANDARD behavior.
|
|
1035
|
+
label = label !== undefined ? String(label) : DEFAULT_ENCODING;
|
|
1036
|
+
var encoding = getEncoding(label);
|
|
1037
|
+
if (encoding === null || encoding.name === 'replacement')
|
|
1038
|
+
throw RangeError('Unknown encoding: ' + label);
|
|
1039
|
+
if (!encoders[encoding.name]) {
|
|
1040
|
+
throw Error('Encoder not present.' + ' Did you forget to include encoding-indexes.js first?');
|
|
1041
|
+
}
|
|
1042
|
+
enc._encoding = encoding;
|
|
513
1043
|
}
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
console.warn('TextEncoder constructor called with encoding label, ' + 'which is ignored.');
|
|
1044
|
+
else {
|
|
1045
|
+
// Standard behavior.
|
|
1046
|
+
enc._encoding = getEncoding('utf-8');
|
|
1047
|
+
if (label !== undefined && 'console' in global) {
|
|
1048
|
+
console.warn('TextEncoder constructor called with encoding label, ' + 'which is ignored.');
|
|
1049
|
+
}
|
|
521
1050
|
}
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
1051
|
+
// For pre-ES5 runtimes:
|
|
1052
|
+
if (!Object.defineProperty)
|
|
1053
|
+
this.encoding = enc._encoding.name.toLowerCase();
|
|
1054
|
+
// 3. Return enc.
|
|
1055
|
+
return enc;
|
|
526
1056
|
}
|
|
527
|
-
|
|
1057
|
+
exports.TextEncoder = TextEncoder;
|
|
528
1058
|
if (Object.defineProperty) {
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
1059
|
+
// The encoding attribute's getter must return encoding's name.
|
|
1060
|
+
Object.defineProperty(TextEncoder.prototype, 'encoding', {
|
|
1061
|
+
/** @this {TextEncoder} */
|
|
1062
|
+
get: function () {
|
|
1063
|
+
return this._encoding.name.toLowerCase();
|
|
1064
|
+
}
|
|
1065
|
+
});
|
|
534
1066
|
}
|
|
535
|
-
|
|
1067
|
+
/**
|
|
1068
|
+
* @param {string=} opt_string The string to encode.
|
|
1069
|
+
* @param {Object=} options
|
|
1070
|
+
* @return {!Uint8Array} Encoded bytes, as a Uint8Array.
|
|
1071
|
+
*/
|
|
536
1072
|
TextEncoder.prototype.encode = function encode(opt_string, options) {
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
var
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
if (!this._do_not_flush) {
|
|
1073
|
+
opt_string = opt_string === undefined ? '' : String(opt_string);
|
|
1074
|
+
options = ToDictionary(options);
|
|
1075
|
+
// NOTE: This option is nonstandard. None of the encodings
|
|
1076
|
+
// permitted for encoding (i.e. UTF-8, UTF-16) are stateful when
|
|
1077
|
+
// the input is a USVString so streaming is not necessary.
|
|
1078
|
+
if (!this._do_not_flush)
|
|
1079
|
+
this._encoder = encoders[this._encoding.name]({
|
|
1080
|
+
fatal: this._fatal === 'fatal'
|
|
1081
|
+
});
|
|
1082
|
+
this._do_not_flush = Boolean(options['stream']);
|
|
1083
|
+
// 1. Convert input to a stream.
|
|
1084
|
+
var input = new Stream(stringToCodePoints(opt_string));
|
|
1085
|
+
// 2. Let output be a new stream
|
|
1086
|
+
var output = [];
|
|
1087
|
+
/** @type {?(number|!Array.<number>)} */
|
|
1088
|
+
var result;
|
|
1089
|
+
// 3. While true, run these substeps:
|
|
556
1090
|
while (true) {
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
1091
|
+
// 1. Let token be the result of reading from input.
|
|
1092
|
+
var token = input.read();
|
|
1093
|
+
if (token === end_of_stream)
|
|
1094
|
+
break;
|
|
1095
|
+
// 2. Let result be the result of processing token for encoder,
|
|
1096
|
+
// input, output.
|
|
1097
|
+
result = this._encoder.handler(input, token);
|
|
1098
|
+
if (result === finished)
|
|
1099
|
+
break;
|
|
1100
|
+
if (Array.isArray(result))
|
|
1101
|
+
output.push.apply(output, /**@type {!Array.<number>}*/ result);
|
|
1102
|
+
else
|
|
1103
|
+
output.push(result);
|
|
560
1104
|
}
|
|
561
|
-
|
|
562
|
-
this.
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
1105
|
+
// TODO: Align with spec algorithm.
|
|
1106
|
+
if (!this._do_not_flush) {
|
|
1107
|
+
while (true) {
|
|
1108
|
+
result = this._encoder.handler(input, input.read());
|
|
1109
|
+
if (result === finished)
|
|
1110
|
+
break;
|
|
1111
|
+
if (Array.isArray(result))
|
|
1112
|
+
output.push.apply(output, /**@type {!Array.<number>}*/ result);
|
|
1113
|
+
else
|
|
1114
|
+
output.push(result);
|
|
1115
|
+
}
|
|
1116
|
+
this._encoder = null;
|
|
1117
|
+
}
|
|
1118
|
+
// 3. If result is finished, convert output into a byte sequence,
|
|
1119
|
+
// and then return a Uint8Array object wrapping an ArrayBuffer
|
|
1120
|
+
// containing output.
|
|
1121
|
+
return new Uint8Array(output);
|
|
566
1122
|
};
|
|
567
|
-
|
|
1123
|
+
//
|
|
1124
|
+
// 9. The encoding
|
|
1125
|
+
//
|
|
1126
|
+
// 9.1 utf-8
|
|
1127
|
+
// 9.1.1 utf-8 decoder
|
|
1128
|
+
/**
|
|
1129
|
+
* @constructor
|
|
1130
|
+
* @implements {Decoder}
|
|
1131
|
+
* @param {{fatal: boolean}} options
|
|
1132
|
+
*/
|
|
568
1133
|
function UTF8Decoder(options) {
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
1134
|
+
var fatal = options.fatal;
|
|
1135
|
+
// utf-8's decoder's has an associated utf-8 code point, utf-8
|
|
1136
|
+
// bytes seen, and utf-8 bytes needed (all initially 0), a utf-8
|
|
1137
|
+
// lower boundary (initially 0x80), and a utf-8 upper boundary
|
|
1138
|
+
// (initially 0xBF).
|
|
1139
|
+
var /** @type {number} */ utf8_code_point = 0,
|
|
1140
|
+
/** @type {number} */ utf8_bytes_seen = 0,
|
|
1141
|
+
/** @type {number} */ utf8_bytes_needed = 0,
|
|
1142
|
+
/** @type {number} */ utf8_lower_boundary = 0x80,
|
|
1143
|
+
/** @type {number} */ utf8_upper_boundary = 0xbf;
|
|
1144
|
+
/**
|
|
1145
|
+
* @param {Stream} stream The stream of bytes being decoded.
|
|
1146
|
+
* @param {number} bite The next byte read from the stream.
|
|
1147
|
+
* @return {?(number|!Array.<number>)} The next code point(s)
|
|
1148
|
+
* decoded, or null if not enough data exists in the input
|
|
1149
|
+
* stream to decode a complete code point.
|
|
1150
|
+
*/
|
|
1151
|
+
this.handler = function (stream, bite) {
|
|
1152
|
+
// 1. If byte is end-of-stream and utf-8 bytes needed is not 0,
|
|
1153
|
+
// set utf-8 bytes needed to 0 and return error.
|
|
1154
|
+
if (bite === end_of_stream && utf8_bytes_needed !== 0) {
|
|
1155
|
+
utf8_bytes_needed = 0;
|
|
1156
|
+
return decoderError(fatal);
|
|
1157
|
+
}
|
|
1158
|
+
// 2. If byte is end-of-stream, return finished.
|
|
1159
|
+
if (bite === end_of_stream)
|
|
1160
|
+
return finished;
|
|
1161
|
+
// 3. If utf-8 bytes needed is 0, based on byte:
|
|
1162
|
+
if (utf8_bytes_needed === 0) {
|
|
1163
|
+
// 0x00 to 0x7F
|
|
1164
|
+
if (inRange(bite, 0x00, 0x7f)) {
|
|
1165
|
+
// Return a code point whose value is byte.
|
|
1166
|
+
return bite;
|
|
1167
|
+
}
|
|
1168
|
+
// 0xC2 to 0xDF
|
|
1169
|
+
else if (inRange(bite, 0xc2, 0xdf)) {
|
|
1170
|
+
// 1. Set utf-8 bytes needed to 1.
|
|
1171
|
+
utf8_bytes_needed = 1;
|
|
1172
|
+
// 2. Set UTF-8 code point to byte & 0x1F.
|
|
1173
|
+
utf8_code_point = bite & 0x1f;
|
|
1174
|
+
}
|
|
1175
|
+
// 0xE0 to 0xEF
|
|
1176
|
+
else if (inRange(bite, 0xe0, 0xef)) {
|
|
1177
|
+
// 1. If byte is 0xE0, set utf-8 lower boundary to 0xA0.
|
|
1178
|
+
if (bite === 0xe0)
|
|
1179
|
+
utf8_lower_boundary = 0xa0;
|
|
1180
|
+
// 2. If byte is 0xED, set utf-8 upper boundary to 0x9F.
|
|
1181
|
+
if (bite === 0xed)
|
|
1182
|
+
utf8_upper_boundary = 0x9f;
|
|
1183
|
+
// 3. Set utf-8 bytes needed to 2.
|
|
1184
|
+
utf8_bytes_needed = 2;
|
|
1185
|
+
// 4. Set UTF-8 code point to byte & 0xF.
|
|
1186
|
+
utf8_code_point = bite & 0xf;
|
|
1187
|
+
}
|
|
1188
|
+
// 0xF0 to 0xF4
|
|
1189
|
+
else if (inRange(bite, 0xf0, 0xf4)) {
|
|
1190
|
+
// 1. If byte is 0xF0, set utf-8 lower boundary to 0x90.
|
|
1191
|
+
if (bite === 0xf0)
|
|
1192
|
+
utf8_lower_boundary = 0x90;
|
|
1193
|
+
// 2. If byte is 0xF4, set utf-8 upper boundary to 0x8F.
|
|
1194
|
+
if (bite === 0xf4)
|
|
1195
|
+
utf8_upper_boundary = 0x8f;
|
|
1196
|
+
// 3. Set utf-8 bytes needed to 3.
|
|
1197
|
+
utf8_bytes_needed = 3;
|
|
1198
|
+
// 4. Set UTF-8 code point to byte & 0x7.
|
|
1199
|
+
utf8_code_point = bite & 0x7;
|
|
1200
|
+
}
|
|
1201
|
+
// Otherwise
|
|
1202
|
+
else {
|
|
1203
|
+
// Return error.
|
|
601
1204
|
return decoderError(fatal);
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
1205
|
+
}
|
|
1206
|
+
// Return continue.
|
|
1207
|
+
return null;
|
|
1208
|
+
}
|
|
1209
|
+
// 4. If byte is not in the range utf-8 lower boundary to utf-8
|
|
1210
|
+
// upper boundary, inclusive, run these substeps:
|
|
1211
|
+
if (!inRange(bite, utf8_lower_boundary, utf8_upper_boundary)) {
|
|
1212
|
+
// 1. Set utf-8 code point, utf-8 bytes needed, and utf-8
|
|
1213
|
+
// bytes seen to 0, set utf-8 lower boundary to 0x80, and set
|
|
1214
|
+
// utf-8 upper boundary to 0xBF.
|
|
1215
|
+
utf8_code_point = utf8_bytes_needed = utf8_bytes_seen = 0;
|
|
1216
|
+
utf8_lower_boundary = 0x80;
|
|
1217
|
+
utf8_upper_boundary = 0xbf;
|
|
1218
|
+
// 2. Prepend byte to stream.
|
|
1219
|
+
stream.prepend(bite);
|
|
1220
|
+
// 3. Return error.
|
|
1221
|
+
return decoderError(fatal);
|
|
1222
|
+
}
|
|
1223
|
+
// 5. Set utf-8 lower boundary to 0x80 and utf-8 upper boundary
|
|
1224
|
+
// to 0xBF.
|
|
1225
|
+
utf8_lower_boundary = 0x80;
|
|
1226
|
+
utf8_upper_boundary = 0xbf;
|
|
1227
|
+
// 6. Set UTF-8 code point to (UTF-8 code point << 6) | (byte &
|
|
1228
|
+
// 0x3F)
|
|
1229
|
+
utf8_code_point = (utf8_code_point << 6) | (bite & 0x3f);
|
|
1230
|
+
// 7. Increase utf-8 bytes seen by one.
|
|
1231
|
+
utf8_bytes_seen += 1;
|
|
1232
|
+
// 8. If utf-8 bytes seen is not equal to utf-8 bytes needed,
|
|
1233
|
+
// continue.
|
|
1234
|
+
if (utf8_bytes_seen !== utf8_bytes_needed)
|
|
1235
|
+
return null;
|
|
1236
|
+
// 9. Let code point be utf-8 code point.
|
|
1237
|
+
var code_point = utf8_code_point;
|
|
1238
|
+
// 10. Set utf-8 code point, utf-8 bytes needed, and utf-8 bytes
|
|
1239
|
+
// seen to 0.
|
|
1240
|
+
utf8_code_point = utf8_bytes_needed = utf8_bytes_seen = 0;
|
|
1241
|
+
// 11. Return a code point whose value is code point.
|
|
1242
|
+
return code_point;
|
|
1243
|
+
};
|
|
624
1244
|
}
|
|
625
|
-
|
|
1245
|
+
// 9.1.2 utf-8 encoder
|
|
1246
|
+
/**
|
|
1247
|
+
* @constructor
|
|
1248
|
+
* @implements {Encoder}
|
|
1249
|
+
* @param {{fatal: boolean}} options
|
|
1250
|
+
*/
|
|
626
1251
|
function UTF8Encoder(options) {
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
1252
|
+
var fatal = options.fatal;
|
|
1253
|
+
/**
|
|
1254
|
+
* @param {Stream} stream Input stream.
|
|
1255
|
+
* @param {number} code_point Next code point read from the stream.
|
|
1256
|
+
* @return {(number|!Array.<number>)} Byte(s) to emit.
|
|
1257
|
+
*/
|
|
1258
|
+
this.handler = function (stream, code_point) {
|
|
1259
|
+
// 1. If code point is end-of-stream, return finished.
|
|
1260
|
+
if (code_point === end_of_stream)
|
|
1261
|
+
return finished;
|
|
1262
|
+
// 2. If code point is an ASCII code point, return a byte whose
|
|
1263
|
+
// value is code point.
|
|
1264
|
+
if (isASCIICodePoint(code_point))
|
|
1265
|
+
return code_point;
|
|
1266
|
+
// 3. Set count and offset based on the range code point is in:
|
|
1267
|
+
var count, offset;
|
|
1268
|
+
// U+0080 to U+07FF, inclusive:
|
|
1269
|
+
if (inRange(code_point, 0x0080, 0x07ff)) {
|
|
1270
|
+
// 1 and 0xC0
|
|
1271
|
+
count = 1;
|
|
1272
|
+
offset = 0xc0;
|
|
1273
|
+
}
|
|
1274
|
+
// U+0800 to U+FFFF, inclusive:
|
|
1275
|
+
else if (inRange(code_point, 0x0800, 0xffff)) {
|
|
1276
|
+
// 2 and 0xE0
|
|
1277
|
+
count = 2;
|
|
1278
|
+
offset = 0xe0;
|
|
1279
|
+
}
|
|
1280
|
+
// U+10000 to U+10FFFF, inclusive:
|
|
1281
|
+
else if (inRange(code_point, 0x10000, 0x10ffff)) {
|
|
1282
|
+
// 3 and 0xF0
|
|
1283
|
+
count = 3;
|
|
1284
|
+
offset = 0xf0;
|
|
1285
|
+
}
|
|
1286
|
+
// 4. Let bytes be a byte sequence whose first byte is (code
|
|
1287
|
+
// point >> (6 × count)) + offset.
|
|
1288
|
+
var bytes = [(code_point >> (6 * count)) + offset];
|
|
1289
|
+
// 5. Run these substeps while count is greater than 0:
|
|
1290
|
+
while (count > 0) {
|
|
1291
|
+
// 1. Set temp to code point >> (6 × (count − 1)).
|
|
1292
|
+
var temp = code_point >> (6 * (count - 1));
|
|
1293
|
+
// 2. Append to bytes 0x80 | (temp & 0x3F).
|
|
1294
|
+
bytes.push(0x80 | (temp & 0x3f));
|
|
1295
|
+
// 3. Decrease count by one.
|
|
1296
|
+
count -= 1;
|
|
1297
|
+
}
|
|
1298
|
+
// 6. Return bytes bytes, in order.
|
|
1299
|
+
return bytes;
|
|
1300
|
+
};
|
|
655
1301
|
}
|
|
656
|
-
|
|
1302
|
+
/** @param {{fatal: boolean}} options */
|
|
657
1303
|
encoders['UTF-8'] = function (options) {
|
|
658
|
-
|
|
1304
|
+
return new UTF8Encoder(options);
|
|
659
1305
|
};
|
|
660
|
-
|
|
1306
|
+
/** @param {{fatal: boolean}} options */
|
|
661
1307
|
decoders['UTF-8'] = function (options) {
|
|
662
|
-
|
|
1308
|
+
return new UTF8Decoder(options);
|
|
663
1309
|
};
|
|
664
|
-
|
|
1310
|
+
//
|
|
1311
|
+
// 10. Legacy single-byte encodings
|
|
1312
|
+
//
|
|
1313
|
+
// 10.1 single-byte decoder
|
|
1314
|
+
/**
|
|
1315
|
+
* @constructor
|
|
1316
|
+
* @implements {Decoder}
|
|
1317
|
+
* @param {!Array.<number>} index The encoding index.
|
|
1318
|
+
* @param {{fatal: boolean}} options
|
|
1319
|
+
*/
|
|
665
1320
|
function SingleByteDecoder(index, options) {
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
1321
|
+
var fatal = options.fatal;
|
|
1322
|
+
/**
|
|
1323
|
+
* @param {Stream} stream The stream of bytes being decoded.
|
|
1324
|
+
* @param {number} bite The next byte read from the stream.
|
|
1325
|
+
* @return {?(number|!Array.<number>)} The next code point(s)
|
|
1326
|
+
* decoded, or null if not enough data exists in the input
|
|
1327
|
+
* stream to decode a complete code point.
|
|
1328
|
+
*/
|
|
1329
|
+
this.handler = function (stream, bite) {
|
|
1330
|
+
// 1. If byte is end-of-stream, return finished.
|
|
1331
|
+
if (bite === end_of_stream)
|
|
1332
|
+
return finished;
|
|
1333
|
+
// 2. If byte is an ASCII byte, return a code point whose value
|
|
1334
|
+
// is byte.
|
|
1335
|
+
if (isASCIIByte(bite))
|
|
1336
|
+
return bite;
|
|
1337
|
+
// 3. Let code point be the index code point for byte − 0x80 in
|
|
1338
|
+
// index single-byte.
|
|
1339
|
+
var code_point = index[bite - 0x80];
|
|
1340
|
+
// 4. If code point is null, return error.
|
|
1341
|
+
if (code_point === null)
|
|
1342
|
+
return decoderError(fatal);
|
|
1343
|
+
// 5. Return a code point whose value is code point.
|
|
1344
|
+
return code_point;
|
|
1345
|
+
};
|
|
675
1346
|
}
|
|
676
|
-
|
|
1347
|
+
// 10.2 single-byte encoder
|
|
1348
|
+
/**
|
|
1349
|
+
* @constructor
|
|
1350
|
+
* @implements {Encoder}
|
|
1351
|
+
* @param {!Array.<?number>} index The encoding index.
|
|
1352
|
+
* @param {{fatal: boolean}} options
|
|
1353
|
+
*/
|
|
677
1354
|
function SingleByteEncoder(index, options) {
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
1355
|
+
var fatal = options.fatal;
|
|
1356
|
+
/**
|
|
1357
|
+
* @param {Stream} stream Input stream.
|
|
1358
|
+
* @param {number} code_point Next code point read from the stream.
|
|
1359
|
+
* @return {(number|!Array.<number>)} Byte(s) to emit.
|
|
1360
|
+
*/
|
|
1361
|
+
this.handler = function (stream, code_point) {
|
|
1362
|
+
// 1. If code point is end-of-stream, return finished.
|
|
1363
|
+
if (code_point === end_of_stream)
|
|
1364
|
+
return finished;
|
|
1365
|
+
// 2. If code point is an ASCII code point, return a byte whose
|
|
1366
|
+
// value is code point.
|
|
1367
|
+
if (isASCIICodePoint(code_point))
|
|
1368
|
+
return code_point;
|
|
1369
|
+
// 3. Let pointer be the index pointer for code point in index
|
|
1370
|
+
// single-byte.
|
|
1371
|
+
var pointer = indexPointerFor(code_point, index);
|
|
1372
|
+
// 4. If pointer is null, return error with code point.
|
|
1373
|
+
if (pointer === null)
|
|
1374
|
+
encoderError(code_point);
|
|
1375
|
+
// 5. Return a byte whose value is pointer + 0x80.
|
|
1376
|
+
return pointer + 0x80;
|
|
1377
|
+
};
|
|
687
1378
|
}
|
|
688
|
-
|
|
689
1379
|
(function () {
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
1380
|
+
if (!('encoding-indexes' in global))
|
|
1381
|
+
return;
|
|
1382
|
+
encodings.forEach(function (category) {
|
|
1383
|
+
if (category.heading !== 'Legacy single-byte encodings')
|
|
1384
|
+
return;
|
|
1385
|
+
category.encodings.forEach(function (encoding) {
|
|
1386
|
+
var name = encoding.name;
|
|
1387
|
+
var idx = index(name.toLowerCase());
|
|
1388
|
+
/** @param {{fatal: boolean}} options */
|
|
1389
|
+
decoders[name] = function (options) {
|
|
1390
|
+
return new SingleByteDecoder(idx, options);
|
|
1391
|
+
};
|
|
1392
|
+
/** @param {{fatal: boolean}} options */
|
|
1393
|
+
encoders[name] = function (options) {
|
|
1394
|
+
return new SingleByteEncoder(idx, options);
|
|
1395
|
+
};
|
|
1396
|
+
});
|
|
704
1397
|
});
|
|
705
|
-
});
|
|
706
1398
|
})();
|
|
707
|
-
|
|
1399
|
+
//
|
|
1400
|
+
// 11. Legacy multi-byte Chinese (simplified) encodings
|
|
1401
|
+
//
|
|
1402
|
+
// 11.1 gbk
|
|
1403
|
+
// 11.1.1 gbk decoder
|
|
1404
|
+
// gbk's decoder is gb18030's decoder.
|
|
1405
|
+
/** @param {{fatal: boolean}} options */
|
|
708
1406
|
decoders['GBK'] = function (options) {
|
|
709
|
-
|
|
1407
|
+
return new GB18030Decoder(options);
|
|
710
1408
|
};
|
|
711
|
-
|
|
1409
|
+
// 11.1.2 gbk encoder
|
|
1410
|
+
// gbk's encoder is gb18030's encoder with its gbk flag set.
|
|
1411
|
+
/** @param {{fatal: boolean}} options */
|
|
712
1412
|
encoders['GBK'] = function (options) {
|
|
713
|
-
|
|
1413
|
+
return new GB18030Encoder(options, true);
|
|
714
1414
|
};
|
|
715
|
-
|
|
1415
|
+
// 11.2 gb18030
|
|
1416
|
+
// 11.2.1 gb18030 decoder
|
|
1417
|
+
/**
|
|
1418
|
+
* @constructor
|
|
1419
|
+
* @implements {Decoder}
|
|
1420
|
+
* @param {{fatal: boolean}} options
|
|
1421
|
+
*/
|
|
716
1422
|
function GB18030Decoder(options) {
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
1423
|
+
var fatal = options.fatal;
|
|
1424
|
+
// gb18030's decoder has an associated gb18030 first, gb18030
|
|
1425
|
+
// second, and gb18030 third (all initially 0x00).
|
|
1426
|
+
var /** @type {number} */ gb18030_first = 0x00,
|
|
1427
|
+
/** @type {number} */ gb18030_second = 0x00,
|
|
1428
|
+
/** @type {number} */ gb18030_third = 0x00;
|
|
1429
|
+
/**
|
|
1430
|
+
* @param {Stream} stream The stream of bytes being decoded.
|
|
1431
|
+
* @param {number} bite The next byte read from the stream.
|
|
1432
|
+
* @return {?(number|!Array.<number>)} The next code point(s)
|
|
1433
|
+
* decoded, or null if not enough data exists in the input
|
|
1434
|
+
* stream to decode a complete code point.
|
|
1435
|
+
*/
|
|
1436
|
+
this.handler = function (stream, bite) {
|
|
1437
|
+
// 1. If byte is end-of-stream and gb18030 first, gb18030
|
|
1438
|
+
// second, and gb18030 third are 0x00, return finished.
|
|
1439
|
+
if (bite === end_of_stream &&
|
|
1440
|
+
gb18030_first === 0x00 &&
|
|
1441
|
+
gb18030_second === 0x00 &&
|
|
1442
|
+
gb18030_third === 0x00) {
|
|
1443
|
+
return finished;
|
|
1444
|
+
}
|
|
1445
|
+
// 2. If byte is end-of-stream, and gb18030 first, gb18030
|
|
1446
|
+
// second, or gb18030 third is not 0x00, set gb18030 first,
|
|
1447
|
+
// gb18030 second, and gb18030 third to 0x00, and return error.
|
|
1448
|
+
if (bite === end_of_stream &&
|
|
1449
|
+
(gb18030_first !== 0x00 || gb18030_second !== 0x00 || gb18030_third !== 0x00)) {
|
|
1450
|
+
gb18030_first = 0x00;
|
|
1451
|
+
gb18030_second = 0x00;
|
|
1452
|
+
gb18030_third = 0x00;
|
|
1453
|
+
decoderError(fatal);
|
|
1454
|
+
}
|
|
1455
|
+
var code_point;
|
|
1456
|
+
// 3. If gb18030 third is not 0x00, run these substeps:
|
|
1457
|
+
if (gb18030_third !== 0x00) {
|
|
1458
|
+
// 1. Let code point be null.
|
|
1459
|
+
code_point = null;
|
|
1460
|
+
// 2. If byte is in the range 0x30 to 0x39, inclusive, set
|
|
1461
|
+
// code point to the index gb18030 ranges code point for
|
|
1462
|
+
// (((gb18030 first − 0x81) × 10 + gb18030 second − 0x30) ×
|
|
1463
|
+
// 126 + gb18030 third − 0x81) × 10 + byte − 0x30.
|
|
1464
|
+
if (inRange(bite, 0x30, 0x39)) {
|
|
1465
|
+
code_point = indexGB18030RangesCodePointFor((((gb18030_first - 0x81) * 10 + gb18030_second - 0x30) * 126 + gb18030_third - 0x81) *
|
|
1466
|
+
10 +
|
|
1467
|
+
bite -
|
|
1468
|
+
0x30);
|
|
1469
|
+
}
|
|
1470
|
+
// 3. Let buffer be a byte sequence consisting of gb18030
|
|
1471
|
+
// second, gb18030 third, and byte, in order.
|
|
1472
|
+
var buffer = [gb18030_second, gb18030_third, bite];
|
|
1473
|
+
// 4. Set gb18030 first, gb18030 second, and gb18030 third to
|
|
1474
|
+
// 0x00.
|
|
1475
|
+
gb18030_first = 0x00;
|
|
1476
|
+
gb18030_second = 0x00;
|
|
1477
|
+
gb18030_third = 0x00;
|
|
1478
|
+
// 5. If code point is null, prepend buffer to stream and
|
|
1479
|
+
// return error.
|
|
1480
|
+
if (code_point === null) {
|
|
1481
|
+
stream.prepend(buffer);
|
|
1482
|
+
return decoderError(fatal);
|
|
1483
|
+
}
|
|
1484
|
+
// 6. Return a code point whose value is code point.
|
|
1485
|
+
return code_point;
|
|
1486
|
+
}
|
|
1487
|
+
// 4. If gb18030 second is not 0x00, run these substeps:
|
|
1488
|
+
if (gb18030_second !== 0x00) {
|
|
1489
|
+
// 1. If byte is in the range 0x81 to 0xFE, inclusive, set
|
|
1490
|
+
// gb18030 third to byte and return continue.
|
|
1491
|
+
if (inRange(bite, 0x81, 0xfe)) {
|
|
1492
|
+
gb18030_third = bite;
|
|
1493
|
+
return null;
|
|
1494
|
+
}
|
|
1495
|
+
// 2. Prepend gb18030 second followed by byte to stream, set
|
|
1496
|
+
// gb18030 first and gb18030 second to 0x00, and return error.
|
|
1497
|
+
stream.prepend([gb18030_second, bite]);
|
|
1498
|
+
gb18030_first = 0x00;
|
|
1499
|
+
gb18030_second = 0x00;
|
|
1500
|
+
return decoderError(fatal);
|
|
1501
|
+
}
|
|
1502
|
+
// 5. If gb18030 first is not 0x00, run these substeps:
|
|
1503
|
+
if (gb18030_first !== 0x00) {
|
|
1504
|
+
// 1. If byte is in the range 0x30 to 0x39, inclusive, set
|
|
1505
|
+
// gb18030 second to byte and return continue.
|
|
1506
|
+
if (inRange(bite, 0x30, 0x39)) {
|
|
1507
|
+
gb18030_second = bite;
|
|
1508
|
+
return null;
|
|
1509
|
+
}
|
|
1510
|
+
// 2. Let lead be gb18030 first, let pointer be null, and set
|
|
1511
|
+
// gb18030 first to 0x00.
|
|
1512
|
+
var lead = gb18030_first;
|
|
1513
|
+
var pointer = null;
|
|
1514
|
+
gb18030_first = 0x00;
|
|
1515
|
+
// 3. Let offset be 0x40 if byte is less than 0x7F and 0x41
|
|
1516
|
+
// otherwise.
|
|
1517
|
+
var offset = bite < 0x7f ? 0x40 : 0x41;
|
|
1518
|
+
// 4. If byte is in the range 0x40 to 0x7E, inclusive, or 0x80
|
|
1519
|
+
// to 0xFE, inclusive, set pointer to (lead − 0x81) × 190 +
|
|
1520
|
+
// (byte − offset).
|
|
1521
|
+
if (inRange(bite, 0x40, 0x7e) || inRange(bite, 0x80, 0xfe))
|
|
1522
|
+
pointer = (lead - 0x81) * 190 + (bite - offset);
|
|
1523
|
+
// 5. Let code point be null if pointer is null and the index
|
|
1524
|
+
// code point for pointer in index gb18030 otherwise.
|
|
1525
|
+
code_point = pointer === null ? null : indexCodePointFor(pointer, index('gb18030'));
|
|
1526
|
+
// 6. If code point is null and byte is an ASCII byte, prepend
|
|
1527
|
+
// byte to stream.
|
|
1528
|
+
if (code_point === null && isASCIIByte(bite))
|
|
1529
|
+
stream.prepend(bite);
|
|
1530
|
+
// 7. If code point is null, return error.
|
|
1531
|
+
if (code_point === null)
|
|
1532
|
+
return decoderError(fatal);
|
|
1533
|
+
// 8. Return a code point whose value is code point.
|
|
1534
|
+
return code_point;
|
|
1535
|
+
}
|
|
1536
|
+
// 6. If byte is an ASCII byte, return a code point whose value
|
|
1537
|
+
// is byte.
|
|
1538
|
+
if (isASCIIByte(bite))
|
|
1539
|
+
return bite;
|
|
1540
|
+
// 7. If byte is 0x80, return code point U+20AC.
|
|
1541
|
+
if (bite === 0x80)
|
|
1542
|
+
return 0x20ac;
|
|
1543
|
+
// 8. If byte is in the range 0x81 to 0xFE, inclusive, set
|
|
1544
|
+
// gb18030 first to byte and return continue.
|
|
1545
|
+
if (inRange(bite, 0x81, 0xfe)) {
|
|
1546
|
+
gb18030_first = bite;
|
|
1547
|
+
return null;
|
|
1548
|
+
}
|
|
1549
|
+
// 9. Return error.
|
|
750
1550
|
return decoderError(fatal);
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
return code_point;
|
|
754
|
-
}
|
|
755
|
-
|
|
756
|
-
if (gb18030_second !== 0x00) {
|
|
757
|
-
if (inRange(bite, 0x81, 0xfe)) {
|
|
758
|
-
gb18030_third = bite;
|
|
759
|
-
return null;
|
|
760
|
-
}
|
|
761
|
-
|
|
762
|
-
stream.prepend([gb18030_second, bite]);
|
|
763
|
-
gb18030_first = 0x00;
|
|
764
|
-
gb18030_second = 0x00;
|
|
765
|
-
return decoderError(fatal);
|
|
766
|
-
}
|
|
767
|
-
|
|
768
|
-
if (gb18030_first !== 0x00) {
|
|
769
|
-
if (inRange(bite, 0x30, 0x39)) {
|
|
770
|
-
gb18030_second = bite;
|
|
771
|
-
return null;
|
|
772
|
-
}
|
|
773
|
-
|
|
774
|
-
var lead = gb18030_first;
|
|
775
|
-
var pointer = null;
|
|
776
|
-
gb18030_first = 0x00;
|
|
777
|
-
var offset = bite < 0x7f ? 0x40 : 0x41;
|
|
778
|
-
if (inRange(bite, 0x40, 0x7e) || inRange(bite, 0x80, 0xfe)) pointer = (lead - 0x81) * 190 + (bite - offset);
|
|
779
|
-
code_point = pointer === null ? null : indexCodePointFor(pointer, index('gb18030'));
|
|
780
|
-
if (code_point === null && isASCIIByte(bite)) stream.prepend(bite);
|
|
781
|
-
if (code_point === null) return decoderError(fatal);
|
|
782
|
-
return code_point;
|
|
783
|
-
}
|
|
784
|
-
|
|
785
|
-
if (isASCIIByte(bite)) return bite;
|
|
786
|
-
if (bite === 0x80) return 0x20ac;
|
|
787
|
-
|
|
788
|
-
if (inRange(bite, 0x81, 0xfe)) {
|
|
789
|
-
gb18030_first = bite;
|
|
790
|
-
return null;
|
|
791
|
-
}
|
|
792
|
-
|
|
793
|
-
return decoderError(fatal);
|
|
794
|
-
};
|
|
1551
|
+
};
|
|
795
1552
|
}
|
|
796
|
-
|
|
1553
|
+
// 11.2.2 gb18030 encoder
|
|
1554
|
+
/**
|
|
1555
|
+
* @constructor
|
|
1556
|
+
* @implements {Encoder}
|
|
1557
|
+
* @param {{fatal: boolean}} options
|
|
1558
|
+
* @param {boolean=} gbk_flag
|
|
1559
|
+
*/
|
|
797
1560
|
function GB18030Encoder(options, gbk_flag) {
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
1561
|
+
var fatal = options.fatal;
|
|
1562
|
+
// gb18030's decoder has an associated gbk flag (initially unset).
|
|
1563
|
+
/**
|
|
1564
|
+
* @param {Stream} stream Input stream.
|
|
1565
|
+
* @param {number} code_point Next code point read from the stream.
|
|
1566
|
+
* @return {(number|!Array.<number>)} Byte(s) to emit.
|
|
1567
|
+
*/
|
|
1568
|
+
this.handler = function (stream, code_point) {
|
|
1569
|
+
// 1. If code point is end-of-stream, return finished.
|
|
1570
|
+
if (code_point === end_of_stream)
|
|
1571
|
+
return finished;
|
|
1572
|
+
// 2. If code point is an ASCII code point, return a byte whose
|
|
1573
|
+
// value is code point.
|
|
1574
|
+
if (isASCIICodePoint(code_point))
|
|
1575
|
+
return code_point;
|
|
1576
|
+
// 3. If code point is U+E5E5, return error with code point.
|
|
1577
|
+
if (code_point === 0xe5e5)
|
|
1578
|
+
return encoderError(code_point);
|
|
1579
|
+
// 4. If the gbk flag is set and code point is U+20AC, return
|
|
1580
|
+
// byte 0x80.
|
|
1581
|
+
if (gbk_flag && code_point === 0x20ac)
|
|
1582
|
+
return 0x80;
|
|
1583
|
+
// 5. Let pointer be the index pointer for code point in index
|
|
1584
|
+
// gb18030.
|
|
1585
|
+
var pointer = indexPointerFor(code_point, index('gb18030'));
|
|
1586
|
+
// 6. If pointer is not null, run these substeps:
|
|
1587
|
+
if (pointer !== null) {
|
|
1588
|
+
// 1. Let lead be floor(pointer / 190) + 0x81.
|
|
1589
|
+
var lead = floor(pointer / 190) + 0x81;
|
|
1590
|
+
// 2. Let trail be pointer % 190.
|
|
1591
|
+
var trail = pointer % 190;
|
|
1592
|
+
// 3. Let offset be 0x40 if trail is less than 0x3F and 0x41 otherwise.
|
|
1593
|
+
var offset = trail < 0x3f ? 0x40 : 0x41;
|
|
1594
|
+
// 4. Return two bytes whose values are lead and trail + offset.
|
|
1595
|
+
return [lead, trail + offset];
|
|
1596
|
+
}
|
|
1597
|
+
// 7. If gbk flag is set, return error with code point.
|
|
1598
|
+
if (gbk_flag)
|
|
1599
|
+
return encoderError(code_point);
|
|
1600
|
+
// 8. Set pointer to the index gb18030 ranges pointer for code
|
|
1601
|
+
// point.
|
|
1602
|
+
pointer = indexGB18030RangesPointerFor(code_point);
|
|
1603
|
+
// 9. Let byte1 be floor(pointer / 10 / 126 / 10).
|
|
1604
|
+
var byte1 = floor(pointer / 10 / 126 / 10);
|
|
1605
|
+
// 10. Set pointer to pointer − byte1 × 10 × 126 × 10.
|
|
1606
|
+
pointer = pointer - byte1 * 10 * 126 * 10;
|
|
1607
|
+
// 11. Let byte2 be floor(pointer / 10 / 126).
|
|
1608
|
+
var byte2 = floor(pointer / 10 / 126);
|
|
1609
|
+
// 12. Set pointer to pointer − byte2 × 10 × 126.
|
|
1610
|
+
pointer = pointer - byte2 * 10 * 126;
|
|
1611
|
+
// 13. Let byte3 be floor(pointer / 10).
|
|
1612
|
+
var byte3 = floor(pointer / 10);
|
|
1613
|
+
// 14. Let byte4 be pointer − byte3 × 10.
|
|
1614
|
+
var byte4 = pointer - byte3 * 10;
|
|
1615
|
+
// 15. Return four bytes whose values are byte1 + 0x81, byte2 +
|
|
1616
|
+
// 0x30, byte3 + 0x81, byte4 + 0x30.
|
|
1617
|
+
return [byte1 + 0x81, byte2 + 0x30, byte3 + 0x81, byte4 + 0x30];
|
|
1618
|
+
};
|
|
824
1619
|
}
|
|
825
|
-
|
|
1620
|
+
/** @param {{fatal: boolean}} options */
|
|
826
1621
|
encoders['gb18030'] = function (options) {
|
|
827
|
-
|
|
1622
|
+
return new GB18030Encoder(options);
|
|
828
1623
|
};
|
|
829
|
-
|
|
1624
|
+
/** @param {{fatal: boolean}} options */
|
|
830
1625
|
decoders['gb18030'] = function (options) {
|
|
831
|
-
|
|
1626
|
+
return new GB18030Decoder(options);
|
|
832
1627
|
};
|
|
833
|
-
|
|
1628
|
+
//
|
|
1629
|
+
// 12. Legacy multi-byte Chinese (traditional) encodings
|
|
1630
|
+
//
|
|
1631
|
+
// 12.1 Big5
|
|
1632
|
+
// 12.1.1 Big5 decoder
|
|
1633
|
+
/**
|
|
1634
|
+
* @constructor
|
|
1635
|
+
* @implements {Decoder}
|
|
1636
|
+
* @param {{fatal: boolean}} options
|
|
1637
|
+
*/
|
|
834
1638
|
function Big5Decoder(options) {
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
1639
|
+
var fatal = options.fatal;
|
|
1640
|
+
// Big5's decoder has an associated Big5 lead (initially 0x00).
|
|
1641
|
+
var /** @type {number} */ Big5_lead = 0x00;
|
|
1642
|
+
/**
|
|
1643
|
+
* @param {Stream} stream The stream of bytes being decoded.
|
|
1644
|
+
* @param {number} bite The next byte read from the stream.
|
|
1645
|
+
* @return {?(number|!Array.<number>)} The next code point(s)
|
|
1646
|
+
* decoded, or null if not enough data exists in the input
|
|
1647
|
+
* stream to decode a complete code point.
|
|
1648
|
+
*/
|
|
1649
|
+
this.handler = function (stream, bite) {
|
|
1650
|
+
// 1. If byte is end-of-stream and Big5 lead is not 0x00, set
|
|
1651
|
+
// Big5 lead to 0x00 and return error.
|
|
1652
|
+
if (bite === end_of_stream && Big5_lead !== 0x00) {
|
|
1653
|
+
Big5_lead = 0x00;
|
|
1654
|
+
return decoderError(fatal);
|
|
1655
|
+
}
|
|
1656
|
+
// 2. If byte is end-of-stream and Big5 lead is 0x00, return
|
|
1657
|
+
// finished.
|
|
1658
|
+
if (bite === end_of_stream && Big5_lead === 0x00)
|
|
1659
|
+
return finished;
|
|
1660
|
+
// 3. If Big5 lead is not 0x00, let lead be Big5 lead, let
|
|
1661
|
+
// pointer be null, set Big5 lead to 0x00, and then run these
|
|
1662
|
+
// substeps:
|
|
1663
|
+
if (Big5_lead !== 0x00) {
|
|
1664
|
+
var lead = Big5_lead;
|
|
1665
|
+
var pointer = null;
|
|
1666
|
+
Big5_lead = 0x00;
|
|
1667
|
+
// 1. Let offset be 0x40 if byte is less than 0x7F and 0x62
|
|
1668
|
+
// otherwise.
|
|
1669
|
+
var offset = bite < 0x7f ? 0x40 : 0x62;
|
|
1670
|
+
// 2. If byte is in the range 0x40 to 0x7E, inclusive, or 0xA1
|
|
1671
|
+
// to 0xFE, inclusive, set pointer to (lead − 0x81) × 157 +
|
|
1672
|
+
// (byte − offset).
|
|
1673
|
+
if (inRange(bite, 0x40, 0x7e) || inRange(bite, 0xa1, 0xfe))
|
|
1674
|
+
pointer = (lead - 0x81) * 157 + (bite - offset);
|
|
1675
|
+
// 3. If there is a row in the table below whose first column
|
|
1676
|
+
// is pointer, return the two code points listed in its second
|
|
1677
|
+
// column
|
|
1678
|
+
// Pointer | Code points
|
|
1679
|
+
// --------+--------------
|
|
1680
|
+
// 1133 | U+00CA U+0304
|
|
1681
|
+
// 1135 | U+00CA U+030C
|
|
1682
|
+
// 1164 | U+00EA U+0304
|
|
1683
|
+
// 1166 | U+00EA U+030C
|
|
1684
|
+
switch (pointer) {
|
|
1685
|
+
case 1133:
|
|
1686
|
+
return [0x00ca, 0x0304];
|
|
1687
|
+
case 1135:
|
|
1688
|
+
return [0x00ca, 0x030c];
|
|
1689
|
+
case 1164:
|
|
1690
|
+
return [0x00ea, 0x0304];
|
|
1691
|
+
case 1166:
|
|
1692
|
+
return [0x00ea, 0x030c];
|
|
1693
|
+
}
|
|
1694
|
+
// 4. Let code point be null if pointer is null and the index
|
|
1695
|
+
// code point for pointer in index Big5 otherwise.
|
|
1696
|
+
var code_point = pointer === null ? null : indexCodePointFor(pointer, index('big5'));
|
|
1697
|
+
// 5. If code point is null and byte is an ASCII byte, prepend
|
|
1698
|
+
// byte to stream.
|
|
1699
|
+
if (code_point === null && isASCIIByte(bite))
|
|
1700
|
+
stream.prepend(bite);
|
|
1701
|
+
// 6. If code point is null, return error.
|
|
1702
|
+
if (code_point === null)
|
|
1703
|
+
return decoderError(fatal);
|
|
1704
|
+
// 7. Return a code point whose value is code point.
|
|
1705
|
+
return code_point;
|
|
1706
|
+
}
|
|
1707
|
+
// 4. If byte is an ASCII byte, return a code point whose value
|
|
1708
|
+
// is byte.
|
|
1709
|
+
if (isASCIIByte(bite))
|
|
1710
|
+
return bite;
|
|
1711
|
+
// 5. If byte is in the range 0x81 to 0xFE, inclusive, set Big5
|
|
1712
|
+
// lead to byte and return continue.
|
|
1713
|
+
if (inRange(bite, 0x81, 0xfe)) {
|
|
1714
|
+
Big5_lead = bite;
|
|
1715
|
+
return null;
|
|
1716
|
+
}
|
|
1717
|
+
// 6. Return error.
|
|
1718
|
+
return decoderError(fatal);
|
|
1719
|
+
};
|
|
882
1720
|
}
|
|
883
|
-
|
|
1721
|
+
// 12.1.2 Big5 encoder
|
|
1722
|
+
/**
|
|
1723
|
+
* @constructor
|
|
1724
|
+
* @implements {Encoder}
|
|
1725
|
+
* @param {{fatal: boolean}} options
|
|
1726
|
+
*/
|
|
884
1727
|
function Big5Encoder(options) {
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
1728
|
+
var fatal = options.fatal;
|
|
1729
|
+
/**
|
|
1730
|
+
* @param {Stream} stream Input stream.
|
|
1731
|
+
* @param {number} code_point Next code point read from the stream.
|
|
1732
|
+
* @return {(number|!Array.<number>)} Byte(s) to emit.
|
|
1733
|
+
*/
|
|
1734
|
+
this.handler = function (stream, code_point) {
|
|
1735
|
+
// 1. If code point is end-of-stream, return finished.
|
|
1736
|
+
if (code_point === end_of_stream)
|
|
1737
|
+
return finished;
|
|
1738
|
+
// 2. If code point is an ASCII code point, return a byte whose
|
|
1739
|
+
// value is code point.
|
|
1740
|
+
if (isASCIICodePoint(code_point))
|
|
1741
|
+
return code_point;
|
|
1742
|
+
// 3. Let pointer be the index Big5 pointer for code point.
|
|
1743
|
+
var pointer = indexBig5PointerFor(code_point);
|
|
1744
|
+
// 4. If pointer is null, return error with code point.
|
|
1745
|
+
if (pointer === null)
|
|
1746
|
+
return encoderError(code_point);
|
|
1747
|
+
// 5. Let lead be floor(pointer / 157) + 0x81.
|
|
1748
|
+
var lead = floor(pointer / 157) + 0x81;
|
|
1749
|
+
// 6. If lead is less than 0xA1, return error with code point.
|
|
1750
|
+
if (lead < 0xa1)
|
|
1751
|
+
return encoderError(code_point);
|
|
1752
|
+
// 7. Let trail be pointer % 157.
|
|
1753
|
+
var trail = pointer % 157;
|
|
1754
|
+
// 8. Let offset be 0x40 if trail is less than 0x3F and 0x62
|
|
1755
|
+
// otherwise.
|
|
1756
|
+
var offset = trail < 0x3f ? 0x40 : 0x62;
|
|
1757
|
+
// Return two bytes whose values are lead and trail + offset.
|
|
1758
|
+
return [lead, trail + offset];
|
|
1759
|
+
};
|
|
898
1760
|
}
|
|
899
|
-
|
|
1761
|
+
/** @param {{fatal: boolean}} options */
|
|
900
1762
|
encoders['Big5'] = function (options) {
|
|
901
|
-
|
|
1763
|
+
return new Big5Encoder(options);
|
|
902
1764
|
};
|
|
903
|
-
|
|
1765
|
+
/** @param {{fatal: boolean}} options */
|
|
904
1766
|
decoders['Big5'] = function (options) {
|
|
905
|
-
|
|
1767
|
+
return new Big5Decoder(options);
|
|
906
1768
|
};
|
|
907
|
-
|
|
1769
|
+
//
|
|
1770
|
+
// 13. Legacy multi-byte Japanese encodings
|
|
1771
|
+
//
|
|
1772
|
+
// 13.1 euc-jp
|
|
1773
|
+
// 13.1.1 euc-jp decoder
|
|
1774
|
+
/**
|
|
1775
|
+
* @constructor
|
|
1776
|
+
* @implements {Decoder}
|
|
1777
|
+
* @param {{fatal: boolean}} options
|
|
1778
|
+
*/
|
|
908
1779
|
function EUCJPDecoder(options) {
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
1780
|
+
var fatal = options.fatal;
|
|
1781
|
+
// euc-jp's decoder has an associated euc-jp jis0212 flag
|
|
1782
|
+
// (initially unset) and euc-jp lead (initially 0x00).
|
|
1783
|
+
var /** @type {boolean} */ eucjp_jis0212_flag = false,
|
|
1784
|
+
/** @type {number} */ eucjp_lead = 0x00;
|
|
1785
|
+
/**
|
|
1786
|
+
* @param {Stream} stream The stream of bytes being decoded.
|
|
1787
|
+
* @param {number} bite The next byte read from the stream.
|
|
1788
|
+
* @return {?(number|!Array.<number>)} The next code point(s)
|
|
1789
|
+
* decoded, or null if not enough data exists in the input
|
|
1790
|
+
* stream to decode a complete code point.
|
|
1791
|
+
*/
|
|
1792
|
+
this.handler = function (stream, bite) {
|
|
1793
|
+
// 1. If byte is end-of-stream and euc-jp lead is not 0x00, set
|
|
1794
|
+
// euc-jp lead to 0x00, and return error.
|
|
1795
|
+
if (bite === end_of_stream && eucjp_lead !== 0x00) {
|
|
1796
|
+
eucjp_lead = 0x00;
|
|
1797
|
+
return decoderError(fatal);
|
|
1798
|
+
}
|
|
1799
|
+
// 2. If byte is end-of-stream and euc-jp lead is 0x00, return
|
|
1800
|
+
// finished.
|
|
1801
|
+
if (bite === end_of_stream && eucjp_lead === 0x00)
|
|
1802
|
+
return finished;
|
|
1803
|
+
// 3. If euc-jp lead is 0x8E and byte is in the range 0xA1 to
|
|
1804
|
+
// 0xDF, inclusive, set euc-jp lead to 0x00 and return a code
|
|
1805
|
+
// point whose value is 0xFF61 − 0xA1 + byte.
|
|
1806
|
+
if (eucjp_lead === 0x8e && inRange(bite, 0xa1, 0xdf)) {
|
|
1807
|
+
eucjp_lead = 0x00;
|
|
1808
|
+
return 0xff61 - 0xa1 + bite;
|
|
1809
|
+
}
|
|
1810
|
+
// 4. If euc-jp lead is 0x8F and byte is in the range 0xA1 to
|
|
1811
|
+
// 0xFE, inclusive, set the euc-jp jis0212 flag, set euc-jp lead
|
|
1812
|
+
// to byte, and return continue.
|
|
1813
|
+
if (eucjp_lead === 0x8f && inRange(bite, 0xa1, 0xfe)) {
|
|
1814
|
+
eucjp_jis0212_flag = true;
|
|
1815
|
+
eucjp_lead = bite;
|
|
1816
|
+
return null;
|
|
1817
|
+
}
|
|
1818
|
+
// 5. If euc-jp lead is not 0x00, let lead be euc-jp lead, set
|
|
1819
|
+
// euc-jp lead to 0x00, and run these substeps:
|
|
1820
|
+
if (eucjp_lead !== 0x00) {
|
|
1821
|
+
var lead = eucjp_lead;
|
|
1822
|
+
eucjp_lead = 0x00;
|
|
1823
|
+
// 1. Let code point be null.
|
|
1824
|
+
var code_point = null;
|
|
1825
|
+
// 2. If lead and byte are both in the range 0xA1 to 0xFE,
|
|
1826
|
+
// inclusive, set code point to the index code point for (lead
|
|
1827
|
+
// − 0xA1) × 94 + byte − 0xA1 in index jis0208 if the euc-jp
|
|
1828
|
+
// jis0212 flag is unset and in index jis0212 otherwise.
|
|
1829
|
+
if (inRange(lead, 0xa1, 0xfe) && inRange(bite, 0xa1, 0xfe)) {
|
|
1830
|
+
code_point = indexCodePointFor((lead - 0xa1) * 94 + (bite - 0xa1), index(!eucjp_jis0212_flag ? 'jis0208' : 'jis0212'));
|
|
1831
|
+
}
|
|
1832
|
+
// 3. Unset the euc-jp jis0212 flag.
|
|
1833
|
+
eucjp_jis0212_flag = false;
|
|
1834
|
+
// 4. If byte is not in the range 0xA1 to 0xFE, inclusive,
|
|
1835
|
+
// prepend byte to stream.
|
|
1836
|
+
if (!inRange(bite, 0xa1, 0xfe))
|
|
1837
|
+
stream.prepend(bite);
|
|
1838
|
+
// 5. If code point is null, return error.
|
|
1839
|
+
if (code_point === null)
|
|
1840
|
+
return decoderError(fatal);
|
|
1841
|
+
// 6. Return a code point whose value is code point.
|
|
1842
|
+
return code_point;
|
|
1843
|
+
}
|
|
1844
|
+
// 6. If byte is an ASCII byte, return a code point whose value
|
|
1845
|
+
// is byte.
|
|
1846
|
+
if (isASCIIByte(bite))
|
|
1847
|
+
return bite;
|
|
1848
|
+
// 7. If byte is 0x8E, 0x8F, or in the range 0xA1 to 0xFE,
|
|
1849
|
+
// inclusive, set euc-jp lead to byte and return continue.
|
|
1850
|
+
if (bite === 0x8e || bite === 0x8f || inRange(bite, 0xa1, 0xfe)) {
|
|
1851
|
+
eucjp_lead = bite;
|
|
1852
|
+
return null;
|
|
1853
|
+
}
|
|
1854
|
+
// 8. Return error.
|
|
1855
|
+
return decoderError(fatal);
|
|
1856
|
+
};
|
|
956
1857
|
}
|
|
957
|
-
|
|
1858
|
+
// 13.1.2 euc-jp encoder
|
|
1859
|
+
/**
|
|
1860
|
+
* @constructor
|
|
1861
|
+
* @implements {Encoder}
|
|
1862
|
+
* @param {{fatal: boolean}} options
|
|
1863
|
+
*/
|
|
958
1864
|
function EUCJPEncoder(options) {
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
1865
|
+
var fatal = options.fatal;
|
|
1866
|
+
/**
|
|
1867
|
+
* @param {Stream} stream Input stream.
|
|
1868
|
+
* @param {number} code_point Next code point read from the stream.
|
|
1869
|
+
* @return {(number|!Array.<number>)} Byte(s) to emit.
|
|
1870
|
+
*/
|
|
1871
|
+
this.handler = function (stream, code_point) {
|
|
1872
|
+
// 1. If code point is end-of-stream, return finished.
|
|
1873
|
+
if (code_point === end_of_stream)
|
|
1874
|
+
return finished;
|
|
1875
|
+
// 2. If code point is an ASCII code point, return a byte whose
|
|
1876
|
+
// value is code point.
|
|
1877
|
+
if (isASCIICodePoint(code_point))
|
|
1878
|
+
return code_point;
|
|
1879
|
+
// 3. If code point is U+00A5, return byte 0x5C.
|
|
1880
|
+
if (code_point === 0x00a5)
|
|
1881
|
+
return 0x5c;
|
|
1882
|
+
// 4. If code point is U+203E, return byte 0x7E.
|
|
1883
|
+
if (code_point === 0x203e)
|
|
1884
|
+
return 0x7e;
|
|
1885
|
+
// 5. If code point is in the range U+FF61 to U+FF9F, inclusive,
|
|
1886
|
+
// return two bytes whose values are 0x8E and code point −
|
|
1887
|
+
// 0xFF61 + 0xA1.
|
|
1888
|
+
if (inRange(code_point, 0xff61, 0xff9f))
|
|
1889
|
+
return [0x8e, code_point - 0xff61 + 0xa1];
|
|
1890
|
+
// 6. If code point is U+2212, set it to U+FF0D.
|
|
1891
|
+
if (code_point === 0x2212)
|
|
1892
|
+
code_point = 0xff0d;
|
|
1893
|
+
// 7. Let pointer be the index pointer for code point in index
|
|
1894
|
+
// jis0208.
|
|
1895
|
+
var pointer = indexPointerFor(code_point, index('jis0208'));
|
|
1896
|
+
// 8. If pointer is null, return error with code point.
|
|
1897
|
+
if (pointer === null)
|
|
1898
|
+
return encoderError(code_point);
|
|
1899
|
+
// 9. Let lead be floor(pointer / 94) + 0xA1.
|
|
1900
|
+
var lead = floor(pointer / 94) + 0xa1;
|
|
1901
|
+
// 10. Let trail be pointer % 94 + 0xA1.
|
|
1902
|
+
var trail = (pointer % 94) + 0xa1;
|
|
1903
|
+
// 11. Return two bytes whose values are lead and trail.
|
|
1904
|
+
return [lead, trail];
|
|
1905
|
+
};
|
|
974
1906
|
}
|
|
975
|
-
|
|
1907
|
+
/** @param {{fatal: boolean}} options */
|
|
976
1908
|
encoders['EUC-JP'] = function (options) {
|
|
977
|
-
|
|
1909
|
+
return new EUCJPEncoder(options);
|
|
978
1910
|
};
|
|
979
|
-
|
|
1911
|
+
/** @param {{fatal: boolean}} options */
|
|
980
1912
|
decoders['EUC-JP'] = function (options) {
|
|
981
|
-
|
|
1913
|
+
return new EUCJPDecoder(options);
|
|
982
1914
|
};
|
|
983
|
-
|
|
1915
|
+
// 13.2 iso-2022-jp
|
|
1916
|
+
// 13.2.1 iso-2022-jp decoder
|
|
1917
|
+
/**
|
|
1918
|
+
* @constructor
|
|
1919
|
+
* @implements {Decoder}
|
|
1920
|
+
* @param {{fatal: boolean}} options
|
|
1921
|
+
*/
|
|
984
1922
|
function ISO2022JPDecoder(options) {
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
|
|
1141
|
-
|
|
1142
|
-
|
|
1143
|
-
|
|
1923
|
+
var fatal = options.fatal;
|
|
1924
|
+
/** @enum */
|
|
1925
|
+
var states = {
|
|
1926
|
+
ASCII: 0,
|
|
1927
|
+
Roman: 1,
|
|
1928
|
+
Katakana: 2,
|
|
1929
|
+
LeadByte: 3,
|
|
1930
|
+
TrailByte: 4,
|
|
1931
|
+
EscapeStart: 5,
|
|
1932
|
+
Escape: 6
|
|
1933
|
+
};
|
|
1934
|
+
// iso-2022-jp's decoder has an associated iso-2022-jp decoder
|
|
1935
|
+
// state (initially ASCII), iso-2022-jp decoder output state
|
|
1936
|
+
// (initially ASCII), iso-2022-jp lead (initially 0x00), and
|
|
1937
|
+
// iso-2022-jp output flag (initially unset).
|
|
1938
|
+
var /** @type {number} */ iso2022jp_decoder_state = states.ASCII,
|
|
1939
|
+
/** @type {number} */ iso2022jp_decoder_output_state = states.ASCII,
|
|
1940
|
+
/** @type {number} */ iso2022jp_lead = 0x00,
|
|
1941
|
+
/** @type {boolean} */ iso2022jp_output_flag = false;
|
|
1942
|
+
/**
|
|
1943
|
+
* @param {Stream} stream The stream of bytes being decoded.
|
|
1944
|
+
* @param {number} bite The next byte read from the stream.
|
|
1945
|
+
* @return {?(number|!Array.<number>)} The next code point(s)
|
|
1946
|
+
* decoded, or null if not enough data exists in the input
|
|
1947
|
+
* stream to decode a complete code point.
|
|
1948
|
+
*/
|
|
1949
|
+
this.handler = function (stream, bite) {
|
|
1950
|
+
// switching on iso-2022-jp decoder state:
|
|
1951
|
+
switch (iso2022jp_decoder_state) {
|
|
1952
|
+
default:
|
|
1953
|
+
case states.ASCII:
|
|
1954
|
+
// ASCII
|
|
1955
|
+
// Based on byte:
|
|
1956
|
+
// 0x1B
|
|
1957
|
+
if (bite === 0x1b) {
|
|
1958
|
+
// Set iso-2022-jp decoder state to escape start and return
|
|
1959
|
+
// continue.
|
|
1960
|
+
iso2022jp_decoder_state = states.EscapeStart;
|
|
1961
|
+
return null;
|
|
1962
|
+
}
|
|
1963
|
+
// 0x00 to 0x7F, excluding 0x0E, 0x0F, and 0x1B
|
|
1964
|
+
if (inRange(bite, 0x00, 0x7f) && bite !== 0x0e && bite !== 0x0f && bite !== 0x1b) {
|
|
1965
|
+
// Unset the iso-2022-jp output flag and return a code point
|
|
1966
|
+
// whose value is byte.
|
|
1967
|
+
iso2022jp_output_flag = false;
|
|
1968
|
+
return bite;
|
|
1969
|
+
}
|
|
1970
|
+
// end-of-stream
|
|
1971
|
+
if (bite === end_of_stream) {
|
|
1972
|
+
// Return finished.
|
|
1973
|
+
return finished;
|
|
1974
|
+
}
|
|
1975
|
+
// Otherwise
|
|
1976
|
+
// Unset the iso-2022-jp output flag and return error.
|
|
1977
|
+
iso2022jp_output_flag = false;
|
|
1978
|
+
return decoderError(fatal);
|
|
1979
|
+
case states.Roman:
|
|
1980
|
+
// Roman
|
|
1981
|
+
// Based on byte:
|
|
1982
|
+
// 0x1B
|
|
1983
|
+
if (bite === 0x1b) {
|
|
1984
|
+
// Set iso-2022-jp decoder state to escape start and return
|
|
1985
|
+
// continue.
|
|
1986
|
+
iso2022jp_decoder_state = states.EscapeStart;
|
|
1987
|
+
return null;
|
|
1988
|
+
}
|
|
1989
|
+
// 0x5C
|
|
1990
|
+
if (bite === 0x5c) {
|
|
1991
|
+
// Unset the iso-2022-jp output flag and return code point
|
|
1992
|
+
// U+00A5.
|
|
1993
|
+
iso2022jp_output_flag = false;
|
|
1994
|
+
return 0x00a5;
|
|
1995
|
+
}
|
|
1996
|
+
// 0x7E
|
|
1997
|
+
if (bite === 0x7e) {
|
|
1998
|
+
// Unset the iso-2022-jp output flag and return code point
|
|
1999
|
+
// U+203E.
|
|
2000
|
+
iso2022jp_output_flag = false;
|
|
2001
|
+
return 0x203e;
|
|
2002
|
+
}
|
|
2003
|
+
// 0x00 to 0x7F, excluding 0x0E, 0x0F, 0x1B, 0x5C, and 0x7E
|
|
2004
|
+
if (inRange(bite, 0x00, 0x7f) &&
|
|
2005
|
+
bite !== 0x0e &&
|
|
2006
|
+
bite !== 0x0f &&
|
|
2007
|
+
bite !== 0x1b &&
|
|
2008
|
+
bite !== 0x5c &&
|
|
2009
|
+
bite !== 0x7e) {
|
|
2010
|
+
// Unset the iso-2022-jp output flag and return a code point
|
|
2011
|
+
// whose value is byte.
|
|
2012
|
+
iso2022jp_output_flag = false;
|
|
2013
|
+
return bite;
|
|
2014
|
+
}
|
|
2015
|
+
// end-of-stream
|
|
2016
|
+
if (bite === end_of_stream) {
|
|
2017
|
+
// Return finished.
|
|
2018
|
+
return finished;
|
|
2019
|
+
}
|
|
2020
|
+
// Otherwise
|
|
2021
|
+
// Unset the iso-2022-jp output flag and return error.
|
|
2022
|
+
iso2022jp_output_flag = false;
|
|
2023
|
+
return decoderError(fatal);
|
|
2024
|
+
case states.Katakana:
|
|
2025
|
+
// Katakana
|
|
2026
|
+
// Based on byte:
|
|
2027
|
+
// 0x1B
|
|
2028
|
+
if (bite === 0x1b) {
|
|
2029
|
+
// Set iso-2022-jp decoder state to escape start and return
|
|
2030
|
+
// continue.
|
|
2031
|
+
iso2022jp_decoder_state = states.EscapeStart;
|
|
2032
|
+
return null;
|
|
2033
|
+
}
|
|
2034
|
+
// 0x21 to 0x5F
|
|
2035
|
+
if (inRange(bite, 0x21, 0x5f)) {
|
|
2036
|
+
// Unset the iso-2022-jp output flag and return a code point
|
|
2037
|
+
// whose value is 0xFF61 − 0x21 + byte.
|
|
2038
|
+
iso2022jp_output_flag = false;
|
|
2039
|
+
return 0xff61 - 0x21 + bite;
|
|
2040
|
+
}
|
|
2041
|
+
// end-of-stream
|
|
2042
|
+
if (bite === end_of_stream) {
|
|
2043
|
+
// Return finished.
|
|
2044
|
+
return finished;
|
|
2045
|
+
}
|
|
2046
|
+
// Otherwise
|
|
2047
|
+
// Unset the iso-2022-jp output flag and return error.
|
|
2048
|
+
iso2022jp_output_flag = false;
|
|
2049
|
+
return decoderError(fatal);
|
|
2050
|
+
case states.LeadByte:
|
|
2051
|
+
// Lead byte
|
|
2052
|
+
// Based on byte:
|
|
2053
|
+
// 0x1B
|
|
2054
|
+
if (bite === 0x1b) {
|
|
2055
|
+
// Set iso-2022-jp decoder state to escape start and return
|
|
2056
|
+
// continue.
|
|
2057
|
+
iso2022jp_decoder_state = states.EscapeStart;
|
|
2058
|
+
return null;
|
|
2059
|
+
}
|
|
2060
|
+
// 0x21 to 0x7E
|
|
2061
|
+
if (inRange(bite, 0x21, 0x7e)) {
|
|
2062
|
+
// Unset the iso-2022-jp output flag, set iso-2022-jp lead
|
|
2063
|
+
// to byte, iso-2022-jp decoder state to trail byte, and
|
|
2064
|
+
// return continue.
|
|
2065
|
+
iso2022jp_output_flag = false;
|
|
2066
|
+
iso2022jp_lead = bite;
|
|
2067
|
+
iso2022jp_decoder_state = states.TrailByte;
|
|
2068
|
+
return null;
|
|
2069
|
+
}
|
|
2070
|
+
// end-of-stream
|
|
2071
|
+
if (bite === end_of_stream) {
|
|
2072
|
+
// Return finished.
|
|
2073
|
+
return finished;
|
|
2074
|
+
}
|
|
2075
|
+
// Otherwise
|
|
2076
|
+
// Unset the iso-2022-jp output flag and return error.
|
|
2077
|
+
iso2022jp_output_flag = false;
|
|
2078
|
+
return decoderError(fatal);
|
|
2079
|
+
case states.TrailByte:
|
|
2080
|
+
// Trail byte
|
|
2081
|
+
// Based on byte:
|
|
2082
|
+
// 0x1B
|
|
2083
|
+
if (bite === 0x1b) {
|
|
2084
|
+
// Set iso-2022-jp decoder state to escape start and return
|
|
2085
|
+
// continue.
|
|
2086
|
+
iso2022jp_decoder_state = states.EscapeStart;
|
|
2087
|
+
return decoderError(fatal);
|
|
2088
|
+
}
|
|
2089
|
+
// 0x21 to 0x7E
|
|
2090
|
+
if (inRange(bite, 0x21, 0x7e)) {
|
|
2091
|
+
// 1. Set the iso-2022-jp decoder state to lead byte.
|
|
2092
|
+
iso2022jp_decoder_state = states.LeadByte;
|
|
2093
|
+
// 2. Let pointer be (iso-2022-jp lead − 0x21) × 94 + byte − 0x21.
|
|
2094
|
+
var pointer = (iso2022jp_lead - 0x21) * 94 + bite - 0x21;
|
|
2095
|
+
// 3. Let code point be the index code point for pointer in
|
|
2096
|
+
// index jis0208.
|
|
2097
|
+
var code_point = indexCodePointFor(pointer, index('jis0208'));
|
|
2098
|
+
// 4. If code point is null, return error.
|
|
2099
|
+
if (code_point === null)
|
|
2100
|
+
return decoderError(fatal);
|
|
2101
|
+
// 5. Return a code point whose value is code point.
|
|
2102
|
+
return code_point;
|
|
2103
|
+
}
|
|
2104
|
+
// end-of-stream
|
|
2105
|
+
if (bite === end_of_stream) {
|
|
2106
|
+
// Set the iso-2022-jp decoder state to lead byte, prepend
|
|
2107
|
+
// byte to stream, and return error.
|
|
2108
|
+
iso2022jp_decoder_state = states.LeadByte;
|
|
2109
|
+
stream.prepend(bite);
|
|
2110
|
+
return decoderError(fatal);
|
|
2111
|
+
}
|
|
2112
|
+
// Otherwise
|
|
2113
|
+
// Set iso-2022-jp decoder state to lead byte and return
|
|
2114
|
+
// error.
|
|
2115
|
+
iso2022jp_decoder_state = states.LeadByte;
|
|
2116
|
+
return decoderError(fatal);
|
|
2117
|
+
case states.EscapeStart:
|
|
2118
|
+
// Escape start
|
|
2119
|
+
// 1. If byte is either 0x24 or 0x28, set iso-2022-jp lead to
|
|
2120
|
+
// byte, iso-2022-jp decoder state to escape, and return
|
|
2121
|
+
// continue.
|
|
2122
|
+
if (bite === 0x24 || bite === 0x28) {
|
|
2123
|
+
iso2022jp_lead = bite;
|
|
2124
|
+
iso2022jp_decoder_state = states.Escape;
|
|
2125
|
+
return null;
|
|
2126
|
+
}
|
|
2127
|
+
// 2. Prepend byte to stream.
|
|
2128
|
+
stream.prepend(bite);
|
|
2129
|
+
// 3. Unset the iso-2022-jp output flag, set iso-2022-jp
|
|
2130
|
+
// decoder state to iso-2022-jp decoder output state, and
|
|
2131
|
+
// return error.
|
|
2132
|
+
iso2022jp_output_flag = false;
|
|
2133
|
+
iso2022jp_decoder_state = iso2022jp_decoder_output_state;
|
|
2134
|
+
return decoderError(fatal);
|
|
2135
|
+
case states.Escape:
|
|
2136
|
+
// Escape
|
|
2137
|
+
// 1. Let lead be iso-2022-jp lead and set iso-2022-jp lead to
|
|
2138
|
+
// 0x00.
|
|
2139
|
+
var lead = iso2022jp_lead;
|
|
2140
|
+
iso2022jp_lead = 0x00;
|
|
2141
|
+
// 2. Let state be null.
|
|
2142
|
+
var state = null;
|
|
2143
|
+
// 3. If lead is 0x28 and byte is 0x42, set state to ASCII.
|
|
2144
|
+
if (lead === 0x28 && bite === 0x42)
|
|
2145
|
+
state = states.ASCII;
|
|
2146
|
+
// 4. If lead is 0x28 and byte is 0x4A, set state to Roman.
|
|
2147
|
+
if (lead === 0x28 && bite === 0x4a)
|
|
2148
|
+
state = states.Roman;
|
|
2149
|
+
// 5. If lead is 0x28 and byte is 0x49, set state to Katakana.
|
|
2150
|
+
if (lead === 0x28 && bite === 0x49)
|
|
2151
|
+
state = states.Katakana;
|
|
2152
|
+
// 6. If lead is 0x24 and byte is either 0x40 or 0x42, set
|
|
2153
|
+
// state to lead byte.
|
|
2154
|
+
if (lead === 0x24 && (bite === 0x40 || bite === 0x42))
|
|
2155
|
+
state = states.LeadByte;
|
|
2156
|
+
// 7. If state is non-null, run these substeps:
|
|
2157
|
+
if (state !== null) {
|
|
2158
|
+
// 1. Set iso-2022-jp decoder state and iso-2022-jp decoder
|
|
2159
|
+
// output state to states.
|
|
2160
|
+
iso2022jp_decoder_state = iso2022jp_decoder_state = state;
|
|
2161
|
+
// 2. Let output flag be the iso-2022-jp output flag.
|
|
2162
|
+
var output_flag = iso2022jp_output_flag;
|
|
2163
|
+
// 3. Set the iso-2022-jp output flag.
|
|
2164
|
+
iso2022jp_output_flag = true;
|
|
2165
|
+
// 4. Return continue, if output flag is unset, and error
|
|
2166
|
+
// otherwise.
|
|
2167
|
+
return !output_flag ? null : decoderError(fatal);
|
|
2168
|
+
}
|
|
2169
|
+
// 8. Prepend lead and byte to stream.
|
|
2170
|
+
stream.prepend([lead, bite]);
|
|
2171
|
+
// 9. Unset the iso-2022-jp output flag, set iso-2022-jp
|
|
2172
|
+
// decoder state to iso-2022-jp decoder output state and
|
|
2173
|
+
// return error.
|
|
2174
|
+
iso2022jp_output_flag = false;
|
|
2175
|
+
iso2022jp_decoder_state = iso2022jp_decoder_output_state;
|
|
2176
|
+
return decoderError(fatal);
|
|
2177
|
+
}
|
|
2178
|
+
};
|
|
1144
2179
|
}
|
|
1145
|
-
|
|
2180
|
+
// 13.2.2 iso-2022-jp encoder
|
|
2181
|
+
/**
|
|
2182
|
+
* @constructor
|
|
2183
|
+
* @implements {Encoder}
|
|
2184
|
+
* @param {{fatal: boolean}} options
|
|
2185
|
+
*/
|
|
1146
2186
|
function ISO2022JPEncoder(options) {
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
ASCII
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
|
|
1178
|
-
|
|
1179
|
-
|
|
1180
|
-
|
|
1181
|
-
|
|
1182
|
-
|
|
1183
|
-
|
|
1184
|
-
|
|
1185
|
-
|
|
1186
|
-
|
|
1187
|
-
|
|
1188
|
-
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
|
|
1192
|
-
|
|
1193
|
-
|
|
1194
|
-
|
|
1195
|
-
|
|
1196
|
-
|
|
1197
|
-
|
|
1198
|
-
|
|
1199
|
-
|
|
1200
|
-
|
|
1201
|
-
|
|
2187
|
+
var fatal = options.fatal;
|
|
2188
|
+
// iso-2022-jp's encoder has an associated iso-2022-jp encoder
|
|
2189
|
+
// state which is one of ASCII, Roman, and jis0208 (initially
|
|
2190
|
+
// ASCII).
|
|
2191
|
+
/** @enum */
|
|
2192
|
+
var states = {
|
|
2193
|
+
ASCII: 0,
|
|
2194
|
+
Roman: 1,
|
|
2195
|
+
jis0208: 2
|
|
2196
|
+
};
|
|
2197
|
+
var /** @type {number} */ iso2022jp_state = states.ASCII;
|
|
2198
|
+
/**
|
|
2199
|
+
* @param {Stream} stream Input stream.
|
|
2200
|
+
* @param {number} code_point Next code point read from the stream.
|
|
2201
|
+
* @return {(number|!Array.<number>)} Byte(s) to emit.
|
|
2202
|
+
*/
|
|
2203
|
+
this.handler = function (stream, code_point) {
|
|
2204
|
+
// 1. If code point is end-of-stream and iso-2022-jp encoder
|
|
2205
|
+
// state is not ASCII, prepend code point to stream, set
|
|
2206
|
+
// iso-2022-jp encoder state to ASCII, and return three bytes
|
|
2207
|
+
// 0x1B 0x28 0x42.
|
|
2208
|
+
if (code_point === end_of_stream && iso2022jp_state !== states.ASCII) {
|
|
2209
|
+
stream.prepend(code_point);
|
|
2210
|
+
iso2022jp_state = states.ASCII;
|
|
2211
|
+
return [0x1b, 0x28, 0x42];
|
|
2212
|
+
}
|
|
2213
|
+
// 2. If code point is end-of-stream and iso-2022-jp encoder
|
|
2214
|
+
// state is ASCII, return finished.
|
|
2215
|
+
if (code_point === end_of_stream && iso2022jp_state === states.ASCII)
|
|
2216
|
+
return finished;
|
|
2217
|
+
// 3. If ISO-2022-JP encoder state is ASCII or Roman, and code
|
|
2218
|
+
// point is U+000E, U+000F, or U+001B, return error with U+FFFD.
|
|
2219
|
+
if ((iso2022jp_state === states.ASCII || iso2022jp_state === states.Roman) &&
|
|
2220
|
+
(code_point === 0x000e || code_point === 0x000f || code_point === 0x001b)) {
|
|
2221
|
+
return encoderError(0xfffd);
|
|
2222
|
+
}
|
|
2223
|
+
// 4. If iso-2022-jp encoder state is ASCII and code point is an
|
|
2224
|
+
// ASCII code point, return a byte whose value is code point.
|
|
2225
|
+
if (iso2022jp_state === states.ASCII && isASCIICodePoint(code_point))
|
|
2226
|
+
return code_point;
|
|
2227
|
+
// 5. If iso-2022-jp encoder state is Roman and code point is an
|
|
2228
|
+
// ASCII code point, excluding U+005C and U+007E, or is U+00A5
|
|
2229
|
+
// or U+203E, run these substeps:
|
|
2230
|
+
if (iso2022jp_state === states.Roman &&
|
|
2231
|
+
((isASCIICodePoint(code_point) && code_point !== 0x005c && code_point !== 0x007e) ||
|
|
2232
|
+
code_point == 0x00a5 ||
|
|
2233
|
+
code_point == 0x203e)) {
|
|
2234
|
+
// 1. If code point is an ASCII code point, return a byte
|
|
2235
|
+
// whose value is code point.
|
|
2236
|
+
if (isASCIICodePoint(code_point))
|
|
2237
|
+
return code_point;
|
|
2238
|
+
// 2. If code point is U+00A5, return byte 0x5C.
|
|
2239
|
+
if (code_point === 0x00a5)
|
|
2240
|
+
return 0x5c;
|
|
2241
|
+
// 3. If code point is U+203E, return byte 0x7E.
|
|
2242
|
+
if (code_point === 0x203e)
|
|
2243
|
+
return 0x7e;
|
|
2244
|
+
}
|
|
2245
|
+
// 6. If code point is an ASCII code point, and iso-2022-jp
|
|
2246
|
+
// encoder state is not ASCII, prepend code point to stream, set
|
|
2247
|
+
// iso-2022-jp encoder state to ASCII, and return three bytes
|
|
2248
|
+
// 0x1B 0x28 0x42.
|
|
2249
|
+
if (isASCIICodePoint(code_point) && iso2022jp_state !== states.ASCII) {
|
|
2250
|
+
stream.prepend(code_point);
|
|
2251
|
+
iso2022jp_state = states.ASCII;
|
|
2252
|
+
return [0x1b, 0x28, 0x42];
|
|
2253
|
+
}
|
|
2254
|
+
// 7. If code point is either U+00A5 or U+203E, and iso-2022-jp
|
|
2255
|
+
// encoder state is not Roman, prepend code point to stream, set
|
|
2256
|
+
// iso-2022-jp encoder state to Roman, and return three bytes
|
|
2257
|
+
// 0x1B 0x28 0x4A.
|
|
2258
|
+
if ((code_point === 0x00a5 || code_point === 0x203e) && iso2022jp_state !== states.Roman) {
|
|
2259
|
+
stream.prepend(code_point);
|
|
2260
|
+
iso2022jp_state = states.Roman;
|
|
2261
|
+
return [0x1b, 0x28, 0x4a];
|
|
2262
|
+
}
|
|
2263
|
+
// 8. If code point is U+2212, set it to U+FF0D.
|
|
2264
|
+
if (code_point === 0x2212)
|
|
2265
|
+
code_point = 0xff0d;
|
|
2266
|
+
// 9. Let pointer be the index pointer for code point in index
|
|
2267
|
+
// jis0208.
|
|
2268
|
+
var pointer = indexPointerFor(code_point, index('jis0208'));
|
|
2269
|
+
// 10. If pointer is null, return error with code point.
|
|
2270
|
+
if (pointer === null)
|
|
2271
|
+
return encoderError(code_point);
|
|
2272
|
+
// 11. If iso-2022-jp encoder state is not jis0208, prepend code
|
|
2273
|
+
// point to stream, set iso-2022-jp encoder state to jis0208,
|
|
2274
|
+
// and return three bytes 0x1B 0x24 0x42.
|
|
2275
|
+
if (iso2022jp_state !== states.jis0208) {
|
|
2276
|
+
stream.prepend(code_point);
|
|
2277
|
+
iso2022jp_state = states.jis0208;
|
|
2278
|
+
return [0x1b, 0x24, 0x42];
|
|
2279
|
+
}
|
|
2280
|
+
// 12. Let lead be floor(pointer / 94) + 0x21.
|
|
2281
|
+
var lead = floor(pointer / 94) + 0x21;
|
|
2282
|
+
// 13. Let trail be pointer % 94 + 0x21.
|
|
2283
|
+
var trail = (pointer % 94) + 0x21;
|
|
2284
|
+
// 14. Return two bytes whose values are lead and trail.
|
|
2285
|
+
return [lead, trail];
|
|
2286
|
+
};
|
|
1202
2287
|
}
|
|
1203
|
-
|
|
2288
|
+
/** @param {{fatal: boolean}} options */
|
|
1204
2289
|
encoders['ISO-2022-JP'] = function (options) {
|
|
1205
|
-
|
|
2290
|
+
return new ISO2022JPEncoder(options);
|
|
1206
2291
|
};
|
|
1207
|
-
|
|
2292
|
+
/** @param {{fatal: boolean}} options */
|
|
1208
2293
|
decoders['ISO-2022-JP'] = function (options) {
|
|
1209
|
-
|
|
2294
|
+
return new ISO2022JPDecoder(options);
|
|
1210
2295
|
};
|
|
1211
|
-
|
|
2296
|
+
// 13.3 Shift_JIS
|
|
2297
|
+
// 13.3.1 Shift_JIS decoder
|
|
2298
|
+
/**
|
|
2299
|
+
* @constructor
|
|
2300
|
+
* @implements {Decoder}
|
|
2301
|
+
* @param {{fatal: boolean}} options
|
|
2302
|
+
*/
|
|
1212
2303
|
function ShiftJISDecoder(options) {
|
|
1213
|
-
|
|
1214
|
-
|
|
1215
|
-
|
|
1216
|
-
|
|
1217
|
-
|
|
1218
|
-
|
|
1219
|
-
|
|
1220
|
-
|
|
1221
|
-
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
|
|
1239
|
-
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
|
|
1245
|
-
|
|
1246
|
-
|
|
1247
|
-
|
|
2304
|
+
var fatal = options.fatal;
|
|
2305
|
+
// Shift_JIS's decoder has an associated Shift_JIS lead (initially
|
|
2306
|
+
// 0x00).
|
|
2307
|
+
var /** @type {number} */ Shift_JIS_lead = 0x00;
|
|
2308
|
+
/**
|
|
2309
|
+
* @param {Stream} stream The stream of bytes being decoded.
|
|
2310
|
+
* @param {number} bite The next byte read from the stream.
|
|
2311
|
+
* @return {?(number|!Array.<number>)} The next code point(s)
|
|
2312
|
+
* decoded, or null if not enough data exists in the input
|
|
2313
|
+
* stream to decode a complete code point.
|
|
2314
|
+
*/
|
|
2315
|
+
this.handler = function (stream, bite) {
|
|
2316
|
+
// 1. If byte is end-of-stream and Shift_JIS lead is not 0x00,
|
|
2317
|
+
// set Shift_JIS lead to 0x00 and return error.
|
|
2318
|
+
if (bite === end_of_stream && Shift_JIS_lead !== 0x00) {
|
|
2319
|
+
Shift_JIS_lead = 0x00;
|
|
2320
|
+
return decoderError(fatal);
|
|
2321
|
+
}
|
|
2322
|
+
// 2. If byte is end-of-stream and Shift_JIS lead is 0x00,
|
|
2323
|
+
// return finished.
|
|
2324
|
+
if (bite === end_of_stream && Shift_JIS_lead === 0x00)
|
|
2325
|
+
return finished;
|
|
2326
|
+
// 3. If Shift_JIS lead is not 0x00, let lead be Shift_JIS lead,
|
|
2327
|
+
// let pointer be null, set Shift_JIS lead to 0x00, and then run
|
|
2328
|
+
// these substeps:
|
|
2329
|
+
if (Shift_JIS_lead !== 0x00) {
|
|
2330
|
+
var lead = Shift_JIS_lead;
|
|
2331
|
+
var pointer = null;
|
|
2332
|
+
Shift_JIS_lead = 0x00;
|
|
2333
|
+
// 1. Let offset be 0x40, if byte is less than 0x7F, and 0x41
|
|
2334
|
+
// otherwise.
|
|
2335
|
+
var offset = bite < 0x7f ? 0x40 : 0x41;
|
|
2336
|
+
// 2. Let lead offset be 0x81, if lead is less than 0xA0, and
|
|
2337
|
+
// 0xC1 otherwise.
|
|
2338
|
+
var lead_offset = lead < 0xa0 ? 0x81 : 0xc1;
|
|
2339
|
+
// 3. If byte is in the range 0x40 to 0x7E, inclusive, or 0x80
|
|
2340
|
+
// to 0xFC, inclusive, set pointer to (lead − lead offset) ×
|
|
2341
|
+
// 188 + byte − offset.
|
|
2342
|
+
if (inRange(bite, 0x40, 0x7e) || inRange(bite, 0x80, 0xfc))
|
|
2343
|
+
pointer = (lead - lead_offset) * 188 + bite - offset;
|
|
2344
|
+
// 4. If pointer is in the range 8836 to 10715, inclusive,
|
|
2345
|
+
// return a code point whose value is 0xE000 − 8836 + pointer.
|
|
2346
|
+
if (inRange(pointer, 8836, 10715))
|
|
2347
|
+
return 0xe000 - 8836 + pointer;
|
|
2348
|
+
// 5. Let code point be null, if pointer is null, and the
|
|
2349
|
+
// index code point for pointer in index jis0208 otherwise.
|
|
2350
|
+
var code_point = pointer === null ? null : indexCodePointFor(pointer, index('jis0208'));
|
|
2351
|
+
// 6. If code point is null and byte is an ASCII byte, prepend
|
|
2352
|
+
// byte to stream.
|
|
2353
|
+
if (code_point === null && isASCIIByte(bite))
|
|
2354
|
+
stream.prepend(bite);
|
|
2355
|
+
// 7. If code point is null, return error.
|
|
2356
|
+
if (code_point === null)
|
|
2357
|
+
return decoderError(fatal);
|
|
2358
|
+
// 8. Return a code point whose value is code point.
|
|
2359
|
+
return code_point;
|
|
2360
|
+
}
|
|
2361
|
+
// 4. If byte is an ASCII byte or 0x80, return a code point
|
|
2362
|
+
// whose value is byte.
|
|
2363
|
+
if (isASCIIByte(bite) || bite === 0x80)
|
|
2364
|
+
return bite;
|
|
2365
|
+
// 5. If byte is in the range 0xA1 to 0xDF, inclusive, return a
|
|
2366
|
+
// code point whose value is 0xFF61 − 0xA1 + byte.
|
|
2367
|
+
if (inRange(bite, 0xa1, 0xdf))
|
|
2368
|
+
return 0xff61 - 0xa1 + bite;
|
|
2369
|
+
// 6. If byte is in the range 0x81 to 0x9F, inclusive, or 0xE0
|
|
2370
|
+
// to 0xFC, inclusive, set Shift_JIS lead to byte and return
|
|
2371
|
+
// continue.
|
|
2372
|
+
if (inRange(bite, 0x81, 0x9f) || inRange(bite, 0xe0, 0xfc)) {
|
|
2373
|
+
Shift_JIS_lead = bite;
|
|
2374
|
+
return null;
|
|
2375
|
+
}
|
|
2376
|
+
// 7. Return error.
|
|
2377
|
+
return decoderError(fatal);
|
|
2378
|
+
};
|
|
1248
2379
|
}
|
|
1249
|
-
|
|
2380
|
+
// 13.3.2 Shift_JIS encoder
|
|
2381
|
+
/**
|
|
2382
|
+
* @constructor
|
|
2383
|
+
* @implements {Encoder}
|
|
2384
|
+
* @param {{fatal: boolean}} options
|
|
2385
|
+
*/
|
|
1250
2386
|
function ShiftJISEncoder(options) {
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1256
|
-
|
|
1257
|
-
|
|
1258
|
-
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
2387
|
+
var fatal = options.fatal;
|
|
2388
|
+
/**
|
|
2389
|
+
* @param {Stream} stream Input stream.
|
|
2390
|
+
* @param {number} code_point Next code point read from the stream.
|
|
2391
|
+
* @return {(number|!Array.<number>)} Byte(s) to emit.
|
|
2392
|
+
*/
|
|
2393
|
+
this.handler = function (stream, code_point) {
|
|
2394
|
+
// 1. If code point is end-of-stream, return finished.
|
|
2395
|
+
if (code_point === end_of_stream)
|
|
2396
|
+
return finished;
|
|
2397
|
+
// 2. If code point is an ASCII code point or U+0080, return a
|
|
2398
|
+
// byte whose value is code point.
|
|
2399
|
+
if (isASCIICodePoint(code_point) || code_point === 0x0080)
|
|
2400
|
+
return code_point;
|
|
2401
|
+
// 3. If code point is U+00A5, return byte 0x5C.
|
|
2402
|
+
if (code_point === 0x00a5)
|
|
2403
|
+
return 0x5c;
|
|
2404
|
+
// 4. If code point is U+203E, return byte 0x7E.
|
|
2405
|
+
if (code_point === 0x203e)
|
|
2406
|
+
return 0x7e;
|
|
2407
|
+
// 5. If code point is in the range U+FF61 to U+FF9F, inclusive,
|
|
2408
|
+
// return a byte whose value is code point − 0xFF61 + 0xA1.
|
|
2409
|
+
if (inRange(code_point, 0xff61, 0xff9f))
|
|
2410
|
+
return code_point - 0xff61 + 0xa1;
|
|
2411
|
+
// 6. If code point is U+2212, set it to U+FF0D.
|
|
2412
|
+
if (code_point === 0x2212)
|
|
2413
|
+
code_point = 0xff0d;
|
|
2414
|
+
// 7. Let pointer be the index Shift_JIS pointer for code point.
|
|
2415
|
+
var pointer = indexShiftJISPointerFor(code_point);
|
|
2416
|
+
// 8. If pointer is null, return error with code point.
|
|
2417
|
+
if (pointer === null)
|
|
2418
|
+
return encoderError(code_point);
|
|
2419
|
+
// 9. Let lead be floor(pointer / 188).
|
|
2420
|
+
var lead = floor(pointer / 188);
|
|
2421
|
+
// 10. Let lead offset be 0x81, if lead is less than 0x1F, and
|
|
2422
|
+
// 0xC1 otherwise.
|
|
2423
|
+
var lead_offset = lead < 0x1f ? 0x81 : 0xc1;
|
|
2424
|
+
// 11. Let trail be pointer % 188.
|
|
2425
|
+
var trail = pointer % 188;
|
|
2426
|
+
// 12. Let offset be 0x40, if trail is less than 0x3F, and 0x41
|
|
2427
|
+
// otherwise.
|
|
2428
|
+
var offset = trail < 0x3f ? 0x40 : 0x41;
|
|
2429
|
+
// 13. Return two bytes whose values are lead + lead offset and
|
|
2430
|
+
// trail + offset.
|
|
2431
|
+
return [lead + lead_offset, trail + offset];
|
|
2432
|
+
};
|
|
1268
2433
|
}
|
|
1269
|
-
|
|
2434
|
+
/** @param {{fatal: boolean}} options */
|
|
1270
2435
|
encoders['Shift_JIS'] = function (options) {
|
|
1271
|
-
|
|
2436
|
+
return new ShiftJISEncoder(options);
|
|
1272
2437
|
};
|
|
1273
|
-
|
|
2438
|
+
/** @param {{fatal: boolean}} options */
|
|
1274
2439
|
decoders['Shift_JIS'] = function (options) {
|
|
1275
|
-
|
|
2440
|
+
return new ShiftJISDecoder(options);
|
|
1276
2441
|
};
|
|
1277
|
-
|
|
2442
|
+
//
|
|
2443
|
+
// 14. Legacy multi-byte Korean encodings
|
|
2444
|
+
//
|
|
2445
|
+
// 14.1 euc-kr
|
|
2446
|
+
// 14.1.1 euc-kr decoder
|
|
2447
|
+
/**
|
|
2448
|
+
* @constructor
|
|
2449
|
+
* @implements {Decoder}
|
|
2450
|
+
* @param {{fatal: boolean}} options
|
|
2451
|
+
*/
|
|
1278
2452
|
function EUCKRDecoder(options) {
|
|
1279
|
-
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
|
|
1291
|
-
|
|
1292
|
-
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
|
|
1300
|
-
|
|
1301
|
-
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
|
|
1305
|
-
|
|
1306
|
-
|
|
1307
|
-
|
|
1308
|
-
|
|
1309
|
-
|
|
2453
|
+
var fatal = options.fatal;
|
|
2454
|
+
// euc-kr's decoder has an associated euc-kr lead (initially 0x00).
|
|
2455
|
+
var /** @type {number} */ euckr_lead = 0x00;
|
|
2456
|
+
/**
|
|
2457
|
+
* @param {Stream} stream The stream of bytes being decoded.
|
|
2458
|
+
* @param {number} bite The next byte read from the stream.
|
|
2459
|
+
* @return {?(number|!Array.<number>)} The next code point(s)
|
|
2460
|
+
* decoded, or null if not enough data exists in the input
|
|
2461
|
+
* stream to decode a complete code point.
|
|
2462
|
+
*/
|
|
2463
|
+
this.handler = function (stream, bite) {
|
|
2464
|
+
// 1. If byte is end-of-stream and euc-kr lead is not 0x00, set
|
|
2465
|
+
// euc-kr lead to 0x00 and return error.
|
|
2466
|
+
if (bite === end_of_stream && euckr_lead !== 0) {
|
|
2467
|
+
euckr_lead = 0x00;
|
|
2468
|
+
return decoderError(fatal);
|
|
2469
|
+
}
|
|
2470
|
+
// 2. If byte is end-of-stream and euc-kr lead is 0x00, return
|
|
2471
|
+
// finished.
|
|
2472
|
+
if (bite === end_of_stream && euckr_lead === 0)
|
|
2473
|
+
return finished;
|
|
2474
|
+
// 3. If euc-kr lead is not 0x00, let lead be euc-kr lead, let
|
|
2475
|
+
// pointer be null, set euc-kr lead to 0x00, and then run these
|
|
2476
|
+
// substeps:
|
|
2477
|
+
if (euckr_lead !== 0x00) {
|
|
2478
|
+
var lead = euckr_lead;
|
|
2479
|
+
var pointer = null;
|
|
2480
|
+
euckr_lead = 0x00;
|
|
2481
|
+
// 1. If byte is in the range 0x41 to 0xFE, inclusive, set
|
|
2482
|
+
// pointer to (lead − 0x81) × 190 + (byte − 0x41).
|
|
2483
|
+
if (inRange(bite, 0x41, 0xfe))
|
|
2484
|
+
pointer = (lead - 0x81) * 190 + (bite - 0x41);
|
|
2485
|
+
// 2. Let code point be null, if pointer is null, and the
|
|
2486
|
+
// index code point for pointer in index euc-kr otherwise.
|
|
2487
|
+
var code_point = pointer === null ? null : indexCodePointFor(pointer, index('euc-kr'));
|
|
2488
|
+
// 3. If code point is null and byte is an ASCII byte, prepend
|
|
2489
|
+
// byte to stream.
|
|
2490
|
+
if (pointer === null && isASCIIByte(bite))
|
|
2491
|
+
stream.prepend(bite);
|
|
2492
|
+
// 4. If code point is null, return error.
|
|
2493
|
+
if (code_point === null)
|
|
2494
|
+
return decoderError(fatal);
|
|
2495
|
+
// 5. Return a code point whose value is code point.
|
|
2496
|
+
return code_point;
|
|
2497
|
+
}
|
|
2498
|
+
// 4. If byte is an ASCII byte, return a code point whose value
|
|
2499
|
+
// is byte.
|
|
2500
|
+
if (isASCIIByte(bite))
|
|
2501
|
+
return bite;
|
|
2502
|
+
// 5. If byte is in the range 0x81 to 0xFE, inclusive, set
|
|
2503
|
+
// euc-kr lead to byte and return continue.
|
|
2504
|
+
if (inRange(bite, 0x81, 0xfe)) {
|
|
2505
|
+
euckr_lead = bite;
|
|
2506
|
+
return null;
|
|
2507
|
+
}
|
|
2508
|
+
// 6. Return error.
|
|
2509
|
+
return decoderError(fatal);
|
|
2510
|
+
};
|
|
1310
2511
|
}
|
|
1311
|
-
|
|
2512
|
+
// 14.1.2 euc-kr encoder
|
|
2513
|
+
/**
|
|
2514
|
+
* @constructor
|
|
2515
|
+
* @implements {Encoder}
|
|
2516
|
+
* @param {{fatal: boolean}} options
|
|
2517
|
+
*/
|
|
1312
2518
|
function EUCKREncoder(options) {
|
|
1313
|
-
|
|
1314
|
-
|
|
1315
|
-
|
|
1316
|
-
|
|
1317
|
-
|
|
1318
|
-
|
|
1319
|
-
|
|
1320
|
-
|
|
1321
|
-
|
|
1322
|
-
|
|
1323
|
-
|
|
2519
|
+
var fatal = options.fatal;
|
|
2520
|
+
/**
|
|
2521
|
+
* @param {Stream} stream Input stream.
|
|
2522
|
+
* @param {number} code_point Next code point read from the stream.
|
|
2523
|
+
* @return {(number|!Array.<number>)} Byte(s) to emit.
|
|
2524
|
+
*/
|
|
2525
|
+
this.handler = function (stream, code_point) {
|
|
2526
|
+
// 1. If code point is end-of-stream, return finished.
|
|
2527
|
+
if (code_point === end_of_stream)
|
|
2528
|
+
return finished;
|
|
2529
|
+
// 2. If code point is an ASCII code point, return a byte whose
|
|
2530
|
+
// value is code point.
|
|
2531
|
+
if (isASCIICodePoint(code_point))
|
|
2532
|
+
return code_point;
|
|
2533
|
+
// 3. Let pointer be the index pointer for code point in index
|
|
2534
|
+
// euc-kr.
|
|
2535
|
+
var pointer = indexPointerFor(code_point, index('euc-kr'));
|
|
2536
|
+
// 4. If pointer is null, return error with code point.
|
|
2537
|
+
if (pointer === null)
|
|
2538
|
+
return encoderError(code_point);
|
|
2539
|
+
// 5. Let lead be floor(pointer / 190) + 0x81.
|
|
2540
|
+
var lead = floor(pointer / 190) + 0x81;
|
|
2541
|
+
// 6. Let trail be pointer % 190 + 0x41.
|
|
2542
|
+
var trail = (pointer % 190) + 0x41;
|
|
2543
|
+
// 7. Return two bytes whose values are lead and trail.
|
|
2544
|
+
return [lead, trail];
|
|
2545
|
+
};
|
|
1324
2546
|
}
|
|
1325
|
-
|
|
2547
|
+
/** @param {{fatal: boolean}} options */
|
|
1326
2548
|
encoders['EUC-KR'] = function (options) {
|
|
1327
|
-
|
|
2549
|
+
return new EUCKREncoder(options);
|
|
1328
2550
|
};
|
|
1329
|
-
|
|
2551
|
+
/** @param {{fatal: boolean}} options */
|
|
1330
2552
|
decoders['EUC-KR'] = function (options) {
|
|
1331
|
-
|
|
2553
|
+
return new EUCKRDecoder(options);
|
|
1332
2554
|
};
|
|
1333
|
-
|
|
2555
|
+
//
|
|
2556
|
+
// 15. Legacy miscellaneous encodings
|
|
2557
|
+
//
|
|
2558
|
+
// 15.1 replacement
|
|
2559
|
+
// Not needed - API throws RangeError
|
|
2560
|
+
// 15.2 Common infrastructure for utf-16be and utf-16le
|
|
2561
|
+
/**
|
|
2562
|
+
* @param {number} code_unit
|
|
2563
|
+
* @param {boolean} utf16be
|
|
2564
|
+
* @return {!Array.<number>} bytes
|
|
2565
|
+
*/
|
|
1334
2566
|
function convertCodeUnitToBytes(code_unit, utf16be) {
|
|
1335
|
-
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
|
|
2567
|
+
// 1. Let byte1 be code unit >> 8.
|
|
2568
|
+
var byte1 = code_unit >> 8;
|
|
2569
|
+
// 2. Let byte2 be code unit & 0x00FF.
|
|
2570
|
+
var byte2 = code_unit & 0x00ff;
|
|
2571
|
+
// 3. Then return the bytes in order:
|
|
2572
|
+
// utf-16be flag is set: byte1, then byte2.
|
|
2573
|
+
if (utf16be)
|
|
2574
|
+
return [byte1, byte2];
|
|
2575
|
+
// utf-16be flag is unset: byte2, then byte1.
|
|
2576
|
+
return [byte2, byte1];
|
|
1339
2577
|
}
|
|
1340
|
-
|
|
2578
|
+
// 15.2.1 shared utf-16 decoder
|
|
2579
|
+
/**
|
|
2580
|
+
* @constructor
|
|
2581
|
+
* @implements {Decoder}
|
|
2582
|
+
* @param {boolean} utf16_be True if big-endian, false if little-endian.
|
|
2583
|
+
* @param {{fatal: boolean}} options
|
|
2584
|
+
*/
|
|
1341
2585
|
function UTF16Decoder(utf16_be, options) {
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
|
|
1346
|
-
|
|
1347
|
-
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
|
|
1351
|
-
|
|
1352
|
-
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
|
|
1356
|
-
|
|
1357
|
-
|
|
1358
|
-
|
|
1359
|
-
|
|
1360
|
-
|
|
1361
|
-
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
|
|
1365
|
-
|
|
1366
|
-
|
|
1367
|
-
|
|
1368
|
-
|
|
1369
|
-
|
|
1370
|
-
|
|
1371
|
-
|
|
1372
|
-
|
|
1373
|
-
|
|
1374
|
-
|
|
1375
|
-
|
|
1376
|
-
|
|
1377
|
-
|
|
1378
|
-
|
|
1379
|
-
|
|
1380
|
-
|
|
1381
|
-
|
|
1382
|
-
|
|
1383
|
-
|
|
1384
|
-
|
|
1385
|
-
|
|
1386
|
-
|
|
1387
|
-
|
|
1388
|
-
|
|
1389
|
-
|
|
2586
|
+
var fatal = options.fatal;
|
|
2587
|
+
var /** @type {?number} */ utf16_lead_byte = null,
|
|
2588
|
+
/** @type {?number} */ utf16_lead_surrogate = null;
|
|
2589
|
+
/**
|
|
2590
|
+
* @param {Stream} stream The stream of bytes being decoded.
|
|
2591
|
+
* @param {number} bite The next byte read from the stream.
|
|
2592
|
+
* @return {?(number|!Array.<number>)} The next code point(s)
|
|
2593
|
+
* decoded, or null if not enough data exists in the input
|
|
2594
|
+
* stream to decode a complete code point.
|
|
2595
|
+
*/
|
|
2596
|
+
this.handler = function (stream, bite) {
|
|
2597
|
+
// 1. If byte is end-of-stream and either utf-16 lead byte or
|
|
2598
|
+
// utf-16 lead surrogate is not null, set utf-16 lead byte and
|
|
2599
|
+
// utf-16 lead surrogate to null, and return error.
|
|
2600
|
+
if (bite === end_of_stream && (utf16_lead_byte !== null || utf16_lead_surrogate !== null)) {
|
|
2601
|
+
return decoderError(fatal);
|
|
2602
|
+
}
|
|
2603
|
+
// 2. If byte is end-of-stream and utf-16 lead byte and utf-16
|
|
2604
|
+
// lead surrogate are null, return finished.
|
|
2605
|
+
if (bite === end_of_stream && utf16_lead_byte === null && utf16_lead_surrogate === null) {
|
|
2606
|
+
return finished;
|
|
2607
|
+
}
|
|
2608
|
+
// 3. If utf-16 lead byte is null, set utf-16 lead byte to byte
|
|
2609
|
+
// and return continue.
|
|
2610
|
+
if (utf16_lead_byte === null) {
|
|
2611
|
+
utf16_lead_byte = bite;
|
|
2612
|
+
return null;
|
|
2613
|
+
}
|
|
2614
|
+
// 4. Let code unit be the result of:
|
|
2615
|
+
var code_unit;
|
|
2616
|
+
if (utf16_be) {
|
|
2617
|
+
// utf-16be decoder flag is set
|
|
2618
|
+
// (utf-16 lead byte << 8) + byte.
|
|
2619
|
+
code_unit = (utf16_lead_byte << 8) + bite;
|
|
2620
|
+
}
|
|
2621
|
+
else {
|
|
2622
|
+
// utf-16be decoder flag is unset
|
|
2623
|
+
// (byte << 8) + utf-16 lead byte.
|
|
2624
|
+
code_unit = (bite << 8) + utf16_lead_byte;
|
|
2625
|
+
}
|
|
2626
|
+
// Then set utf-16 lead byte to null.
|
|
2627
|
+
utf16_lead_byte = null;
|
|
2628
|
+
// 5. If utf-16 lead surrogate is not null, let lead surrogate
|
|
2629
|
+
// be utf-16 lead surrogate, set utf-16 lead surrogate to null,
|
|
2630
|
+
// and then run these substeps:
|
|
2631
|
+
if (utf16_lead_surrogate !== null) {
|
|
2632
|
+
var lead_surrogate = utf16_lead_surrogate;
|
|
2633
|
+
utf16_lead_surrogate = null;
|
|
2634
|
+
// 1. If code unit is in the range U+DC00 to U+DFFF,
|
|
2635
|
+
// inclusive, return a code point whose value is 0x10000 +
|
|
2636
|
+
// ((lead surrogate − 0xD800) << 10) + (code unit − 0xDC00).
|
|
2637
|
+
if (inRange(code_unit, 0xdc00, 0xdfff)) {
|
|
2638
|
+
return 0x10000 + (lead_surrogate - 0xd800) * 0x400 + (code_unit - 0xdc00);
|
|
2639
|
+
}
|
|
2640
|
+
// 2. Prepend the sequence resulting of converting code unit
|
|
2641
|
+
// to bytes using utf-16be decoder flag to stream and return
|
|
2642
|
+
// error.
|
|
2643
|
+
stream.prepend(convertCodeUnitToBytes(code_unit, utf16_be));
|
|
2644
|
+
return decoderError(fatal);
|
|
2645
|
+
}
|
|
2646
|
+
// 6. If code unit is in the range U+D800 to U+DBFF, inclusive,
|
|
2647
|
+
// set utf-16 lead surrogate to code unit and return continue.
|
|
2648
|
+
if (inRange(code_unit, 0xd800, 0xdbff)) {
|
|
2649
|
+
utf16_lead_surrogate = code_unit;
|
|
2650
|
+
return null;
|
|
2651
|
+
}
|
|
2652
|
+
// 7. If code unit is in the range U+DC00 to U+DFFF, inclusive,
|
|
2653
|
+
// return error.
|
|
2654
|
+
if (inRange(code_unit, 0xdc00, 0xdfff))
|
|
2655
|
+
return decoderError(fatal);
|
|
2656
|
+
// 8. Return code point code unit.
|
|
2657
|
+
return code_unit;
|
|
2658
|
+
};
|
|
1390
2659
|
}
|
|
1391
|
-
|
|
2660
|
+
// 15.2.2 shared utf-16 encoder
|
|
2661
|
+
/**
|
|
2662
|
+
* @constructor
|
|
2663
|
+
* @implements {Encoder}
|
|
2664
|
+
* @param {boolean} utf16_be True if big-endian, false if little-endian.
|
|
2665
|
+
* @param {{fatal: boolean}} options
|
|
2666
|
+
*/
|
|
1392
2667
|
function UTF16Encoder(utf16_be, options) {
|
|
1393
|
-
|
|
1394
|
-
|
|
1395
|
-
|
|
1396
|
-
|
|
1397
|
-
|
|
1398
|
-
|
|
1399
|
-
|
|
1400
|
-
|
|
1401
|
-
|
|
2668
|
+
var fatal = options.fatal;
|
|
2669
|
+
/**
|
|
2670
|
+
* @param {Stream} stream Input stream.
|
|
2671
|
+
* @param {number} code_point Next code point read from the stream.
|
|
2672
|
+
* @return {(number|!Array.<number>)} Byte(s) to emit.
|
|
2673
|
+
*/
|
|
2674
|
+
this.handler = function (stream, code_point) {
|
|
2675
|
+
// 1. If code point is end-of-stream, return finished.
|
|
2676
|
+
if (code_point === end_of_stream)
|
|
2677
|
+
return finished;
|
|
2678
|
+
// 2. If code point is in the range U+0000 to U+FFFF, inclusive,
|
|
2679
|
+
// return the sequence resulting of converting code point to
|
|
2680
|
+
// bytes using utf-16be encoder flag.
|
|
2681
|
+
if (inRange(code_point, 0x0000, 0xffff))
|
|
2682
|
+
return convertCodeUnitToBytes(code_point, utf16_be);
|
|
2683
|
+
// 3. Let lead be ((code point − 0x10000) >> 10) + 0xD800,
|
|
2684
|
+
// converted to bytes using utf-16be encoder flag.
|
|
2685
|
+
var lead = convertCodeUnitToBytes(((code_point - 0x10000) >> 10) + 0xd800, utf16_be);
|
|
2686
|
+
// 4. Let trail be ((code point − 0x10000) & 0x3FF) + 0xDC00,
|
|
2687
|
+
// converted to bytes using utf-16be encoder flag.
|
|
2688
|
+
var trail = convertCodeUnitToBytes(((code_point - 0x10000) & 0x3ff) + 0xdc00, utf16_be);
|
|
2689
|
+
// 5. Return a byte sequence of lead followed by trail.
|
|
2690
|
+
return lead.concat(trail);
|
|
2691
|
+
};
|
|
1402
2692
|
}
|
|
1403
|
-
|
|
2693
|
+
// 15.3 utf-16be
|
|
2694
|
+
// 15.3.1 utf-16be decoder
|
|
2695
|
+
/** @param {{fatal: boolean}} options */
|
|
1404
2696
|
encoders['UTF-16BE'] = function (options) {
|
|
1405
|
-
|
|
2697
|
+
return new UTF16Encoder(true, options);
|
|
1406
2698
|
};
|
|
1407
|
-
|
|
2699
|
+
// 15.3.2 utf-16be encoder
|
|
2700
|
+
/** @param {{fatal: boolean}} options */
|
|
1408
2701
|
decoders['UTF-16BE'] = function (options) {
|
|
1409
|
-
|
|
2702
|
+
return new UTF16Decoder(true, options);
|
|
1410
2703
|
};
|
|
1411
|
-
|
|
2704
|
+
// 15.4 utf-16le
|
|
2705
|
+
// 15.4.1 utf-16le decoder
|
|
2706
|
+
/** @param {{fatal: boolean}} options */
|
|
1412
2707
|
encoders['UTF-16LE'] = function (options) {
|
|
1413
|
-
|
|
2708
|
+
return new UTF16Encoder(false, options);
|
|
1414
2709
|
};
|
|
1415
|
-
|
|
2710
|
+
// 15.4.2 utf-16le encoder
|
|
2711
|
+
/** @param {{fatal: boolean}} options */
|
|
1416
2712
|
decoders['UTF-16LE'] = function (options) {
|
|
1417
|
-
|
|
2713
|
+
return new UTF16Decoder(false, options);
|
|
1418
2714
|
};
|
|
1419
|
-
|
|
2715
|
+
// 15.5 x-user-defined
|
|
2716
|
+
// 15.5.1 x-user-defined decoder
|
|
2717
|
+
/**
|
|
2718
|
+
* @constructor
|
|
2719
|
+
* @implements {Decoder}
|
|
2720
|
+
* @param {{fatal: boolean}} options
|
|
2721
|
+
*/
|
|
1420
2722
|
function XUserDefinedDecoder(options) {
|
|
1421
|
-
|
|
1422
|
-
|
|
1423
|
-
|
|
1424
|
-
|
|
1425
|
-
|
|
1426
|
-
|
|
1427
|
-
|
|
2723
|
+
var fatal = options.fatal;
|
|
2724
|
+
/**
|
|
2725
|
+
* @param {Stream} stream The stream of bytes being decoded.
|
|
2726
|
+
* @param {number} bite The next byte read from the stream.
|
|
2727
|
+
* @return {?(number|!Array.<number>)} The next code point(s)
|
|
2728
|
+
* decoded, or null if not enough data exists in the input
|
|
2729
|
+
* stream to decode a complete code point.
|
|
2730
|
+
*/
|
|
2731
|
+
this.handler = function (stream, bite) {
|
|
2732
|
+
// 1. If byte is end-of-stream, return finished.
|
|
2733
|
+
if (bite === end_of_stream)
|
|
2734
|
+
return finished;
|
|
2735
|
+
// 2. If byte is an ASCII byte, return a code point whose value
|
|
2736
|
+
// is byte.
|
|
2737
|
+
if (isASCIIByte(bite))
|
|
2738
|
+
return bite;
|
|
2739
|
+
// 3. Return a code point whose value is 0xF780 + byte − 0x80.
|
|
2740
|
+
return 0xf780 + bite - 0x80;
|
|
2741
|
+
};
|
|
1428
2742
|
}
|
|
1429
|
-
|
|
2743
|
+
// 15.5.2 x-user-defined encoder
|
|
2744
|
+
/**
|
|
2745
|
+
* @constructor
|
|
2746
|
+
* @implements {Encoder}
|
|
2747
|
+
* @param {{fatal: boolean}} options
|
|
2748
|
+
*/
|
|
1430
2749
|
function XUserDefinedEncoder(options) {
|
|
1431
|
-
|
|
1432
|
-
|
|
1433
|
-
|
|
1434
|
-
|
|
1435
|
-
|
|
1436
|
-
|
|
1437
|
-
|
|
1438
|
-
|
|
2750
|
+
var fatal = options.fatal;
|
|
2751
|
+
/**
|
|
2752
|
+
* @param {Stream} stream Input stream.
|
|
2753
|
+
* @param {number} code_point Next code point read from the stream.
|
|
2754
|
+
* @return {(number|!Array.<number>)} Byte(s) to emit.
|
|
2755
|
+
*/
|
|
2756
|
+
this.handler = function (stream, code_point) {
|
|
2757
|
+
// 1.If code point is end-of-stream, return finished.
|
|
2758
|
+
if (code_point === end_of_stream)
|
|
2759
|
+
return finished;
|
|
2760
|
+
// 2. If code point is an ASCII code point, return a byte whose
|
|
2761
|
+
// value is code point.
|
|
2762
|
+
if (isASCIICodePoint(code_point))
|
|
2763
|
+
return code_point;
|
|
2764
|
+
// 3. If code point is in the range U+F780 to U+F7FF, inclusive,
|
|
2765
|
+
// return a byte whose value is code point − 0xF780 + 0x80.
|
|
2766
|
+
if (inRange(code_point, 0xf780, 0xf7ff))
|
|
2767
|
+
return code_point - 0xf780 + 0x80;
|
|
2768
|
+
// 4. Return error with code point.
|
|
2769
|
+
return encoderError(code_point);
|
|
2770
|
+
};
|
|
1439
2771
|
}
|
|
1440
|
-
|
|
2772
|
+
/** @param {{fatal: boolean}} options */
|
|
1441
2773
|
encoders['x-user-defined'] = function (options) {
|
|
1442
|
-
|
|
2774
|
+
return new XUserDefinedEncoder(options);
|
|
1443
2775
|
};
|
|
1444
|
-
|
|
2776
|
+
/** @param {{fatal: boolean}} options */
|
|
1445
2777
|
decoders['x-user-defined'] = function (options) {
|
|
1446
|
-
|
|
2778
|
+
return new XUserDefinedDecoder(options);
|
|
1447
2779
|
};
|
|
1448
|
-
|
|
1449
|
-
export { TextEncoder, TextDecoder };
|
|
1450
|
-
//# sourceMappingURL=encoding.js.map
|