hama-js 1.3.2 → 1.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Binary file
@@ -1,23 +1,99 @@
1
1
  import { InferenceSession, Tensor } from "onnxruntime-web";
2
- import { decodeIdsToResult, encodeText } from "./tokenizer.js";
2
+ import { decodeIdsToResult, decoderIds, encodeText } from "./tokenizer.js";
3
3
  const DEFAULT_MODEL_URL = new URL("./assets/g2p_fp16.onnx", import.meta.url).toString();
4
+ const DEFAULT_ENCODER_URL = new URL("./assets/encoder.onnx", import.meta.url).toString();
5
+ const DEFAULT_DECODER_STEP_URL = new URL("./assets/decoder_step.onnx", import.meta.url).toString();
6
+ const resolveName = (available, primary, ...fallbacks) => {
7
+ if (available.includes(primary))
8
+ return primary;
9
+ for (const fallback of fallbacks) {
10
+ if (available.includes(fallback))
11
+ return fallback;
12
+ }
13
+ const matches = available.filter((name) => name.startsWith(`${primary}.`) || name.startsWith(primary));
14
+ if (matches.length === 1)
15
+ return matches[0];
16
+ if (matches.length > 1) {
17
+ const numeric = matches.filter((name) => name.startsWith(`${primary}.`));
18
+ if (numeric.length === 1)
19
+ return numeric[0];
20
+ return matches[0];
21
+ }
22
+ throw new Error(`Could not resolve ONNX tensor name for '${primary}'. Available: ${available.join(", ")}`);
23
+ };
4
24
  export class G2PBrowserModel {
5
25
  constructor(options) {
6
26
  this.options = options;
7
27
  }
8
28
  static async create(options = {}) {
29
+ if ((options.encoderUrl === undefined) !== (options.decoderStepUrl === undefined)) {
30
+ throw new Error("encoderUrl and decoderStepUrl must be provided together");
31
+ }
9
32
  const opts = {
10
33
  modelUrl: options.modelUrl ?? DEFAULT_MODEL_URL,
34
+ encoderUrl: options.encoderUrl ?? DEFAULT_ENCODER_URL,
35
+ decoderStepUrl: options.decoderStepUrl ?? DEFAULT_DECODER_STEP_URL,
11
36
  maxInputLen: options.maxInputLen ?? 128,
37
+ // Retained for API compatibility; autoregressive ONNX sets output length in-graph.
12
38
  maxOutputLen: options.maxOutputLen ?? 32,
13
39
  };
14
40
  const model = new G2PBrowserModel(opts);
15
- model.session = await InferenceSession.create(opts.modelUrl, {
16
- executionProviders: ["wasm"],
17
- });
41
+ const useExplicitSplit = options.encoderUrl !== undefined && options.decoderStepUrl !== undefined;
42
+ if (useExplicitSplit) {
43
+ const [encoderSession, decoderStepSession] = await Promise.all([
44
+ InferenceSession.create(opts.encoderUrl, { executionProviders: ["wasm"] }),
45
+ InferenceSession.create(opts.decoderStepUrl, { executionProviders: ["wasm"] }),
46
+ ]);
47
+ model.encoderSession = encoderSession;
48
+ model.decoderStepSession = decoderStepSession;
49
+ return model;
50
+ }
51
+ try {
52
+ const [encoderSession, decoderStepSession] = await Promise.all([
53
+ InferenceSession.create(opts.encoderUrl, { executionProviders: ["wasm"] }),
54
+ InferenceSession.create(opts.decoderStepUrl, { executionProviders: ["wasm"] }),
55
+ ]);
56
+ model.encoderSession = encoderSession;
57
+ model.decoderStepSession = decoderStepSession;
58
+ }
59
+ catch {
60
+ model.session = await InferenceSession.create(opts.modelUrl, {
61
+ executionProviders: ["wasm"],
62
+ });
63
+ }
18
64
  return model;
19
65
  }
20
- async predict(text) {
66
+ async predict(text, options = {}) {
67
+ const splitDelimiter = options.splitDelimiter ?? /\s+/u;
68
+ const outputDelimiter = options.outputDelimiter ?? " ";
69
+ const segments = splitSegments(text, splitDelimiter);
70
+ if (segments.length === 0) {
71
+ return this.predictSingle(text, 0);
72
+ }
73
+ const results = await Promise.all(segments.map(async (segment) => this.predictSingle(segment.text, codePointOffset(text, segment.startCodeUnit))));
74
+ const ipaParts = [];
75
+ const alignments = [];
76
+ for (let i = 0; i < results.length; i++) {
77
+ if (i > 0)
78
+ ipaParts.push(outputDelimiter);
79
+ ipaParts.push(results[i].ipa);
80
+ for (const alignment of results[i].alignments) {
81
+ alignments.push({
82
+ phoneme: alignment.phoneme,
83
+ phonemeIndex: alignments.length,
84
+ charIndex: alignment.charIndex,
85
+ });
86
+ }
87
+ }
88
+ return { ipa: ipaParts.join(""), alignments };
89
+ }
90
+ async predictSingle(text, baseCharIndex) {
91
+ if (this.encoderSession && this.decoderStepSession) {
92
+ return this.predictSingleSplit(text, baseCharIndex);
93
+ }
94
+ if (!this.session) {
95
+ throw new Error("No ONNX session initialized");
96
+ }
21
97
  const encoded = encodeText(text, this.options.maxInputLen);
22
98
  const inputIds = BigInt64Array.from(encoded.ids);
23
99
  const inputLengths = new BigInt64Array([BigInt(encoded.length || 1)]);
@@ -28,7 +104,149 @@ export class G2PBrowserModel {
28
104
  const outputs = await this.session.run(feeds);
29
105
  const decoded = outputs.decoded_ids.data;
30
106
  const attn = outputs.attn_indices.data;
31
- return decodeIdsToResult(decoded, attn, encoded.positionMap);
107
+ const result = decodeIdsToResult(decoded, attn, encoded.positionMap);
108
+ return {
109
+ ipa: result.ipa,
110
+ alignments: result.alignments.map((alignment, idx) => ({
111
+ phoneme: alignment.phoneme,
112
+ phonemeIndex: idx,
113
+ charIndex: alignment.charIndex < 0 ? alignment.charIndex : alignment.charIndex + baseCharIndex,
114
+ })),
115
+ };
116
+ }
117
+ async predictSingleSplit(text, baseCharIndex) {
118
+ if (!this.encoderSession || !this.decoderStepSession) {
119
+ throw new Error("Split ONNX sessions are not initialized");
120
+ }
121
+ const encoded = encodeText(text, this.options.maxInputLen);
122
+ const inputIds = BigInt64Array.from(encoded.ids);
123
+ const inputLengths = new BigInt64Array([BigInt(encoded.length || 1)]);
124
+ const encoderFeeds = {
125
+ input_ids: new Tensor("int64", inputIds, [1, this.options.maxInputLen]),
126
+ input_lengths: new Tensor("int64", inputLengths, [1]),
127
+ };
128
+ const encoderOutputs = await this.encoderSession.run(encoderFeeds);
129
+ const encoderOutputNames = this.encoderSession.outputNames;
130
+ const decoderInputNames = this.decoderStepSession.inputNames;
131
+ const decoderOutputNames = this.decoderStepSession.outputNames;
132
+ const encNames = {
133
+ encoder_outputs: resolveName(encoderOutputNames, "encoder_outputs"),
134
+ projected_keys: resolveName(encoderOutputNames, "projected_keys"),
135
+ encoder_mask: resolveName(encoderOutputNames, "encoder_mask"),
136
+ hidden: resolveName(encoderOutputNames, "hidden"),
137
+ prev_attn: resolveName(encoderOutputNames, "prev_attn"),
138
+ };
139
+ const decIn = {
140
+ decoder_input_ids: resolveName(decoderInputNames, "decoder_input_ids"),
141
+ encoder_outputs: resolveName(decoderInputNames, "encoder_outputs"),
142
+ projected_keys: resolveName(decoderInputNames, "projected_keys"),
143
+ encoder_mask: resolveName(decoderInputNames, "encoder_mask"),
144
+ prev_attn: resolveName(decoderInputNames, "prev_attn", "prev_attn_in"),
145
+ hidden: resolveName(decoderInputNames, "hidden", "hidden_in"),
146
+ positions: resolveName(decoderInputNames, "positions"),
147
+ };
148
+ const decOut = {
149
+ next_token_ids: resolveName(decoderOutputNames, "next_token_ids"),
150
+ hidden: resolveName(decoderOutputNames, "hidden_out", "hidden"),
151
+ prev_attn: resolveName(decoderOutputNames, "prev_attn_out", "prev_attn"),
152
+ attn_argmax: resolveName(decoderOutputNames, "attn_argmax"),
153
+ };
154
+ const encoderStates = {
155
+ encoder_outputs: encoderOutputs[encNames.encoder_outputs],
156
+ projected_keys: encoderOutputs[encNames.projected_keys],
157
+ encoder_mask: encoderOutputs[encNames.encoder_mask],
158
+ hidden: encoderOutputs[encNames.hidden],
159
+ prev_attn: encoderOutputs[encNames.prev_attn],
160
+ };
161
+ const srcLen = Number(encoderStates.encoder_outputs.dims[1] ?? 0);
162
+ const positions = new Float32Array(srcLen);
163
+ for (let i = 0; i < srcLen; i++)
164
+ positions[i] = i;
165
+ const positionsTensor = new Tensor("float32", positions, [1, srcLen]);
166
+ let decoderInput = new Tensor("int64", new BigInt64Array([BigInt(decoderIds.sos)]), [1, 1]);
167
+ let hidden = encoderStates.hidden;
168
+ let prevAttn = encoderStates.prev_attn;
169
+ const decoded = [];
170
+ const attnIndices = [];
171
+ for (let step = 0; step < this.options.maxOutputLen; step++) {
172
+ const stepOutputs = await this.decoderStepSession.run({
173
+ [decIn.decoder_input_ids]: decoderInput,
174
+ [decIn.encoder_outputs]: encoderStates.encoder_outputs,
175
+ [decIn.projected_keys]: encoderStates.projected_keys,
176
+ [decIn.encoder_mask]: encoderStates.encoder_mask,
177
+ [decIn.prev_attn]: prevAttn,
178
+ [decIn.hidden]: hidden,
179
+ [decIn.positions]: positionsTensor,
180
+ });
181
+ const nextToken = firstInt64(stepOutputs[decOut.next_token_ids]);
182
+ const attnIdx = firstInt64(stepOutputs[decOut.attn_argmax]);
183
+ decoded.push(nextToken);
184
+ attnIndices.push(attnIdx);
185
+ hidden = stepOutputs[decOut.hidden];
186
+ prevAttn = stepOutputs[decOut.prev_attn];
187
+ decoderInput = new Tensor("int64", new BigInt64Array([nextToken]), [1, 1]);
188
+ if (nextToken === BigInt(decoderIds.eos)) {
189
+ break;
190
+ }
191
+ }
192
+ const result = decodeIdsToResult(decoded, attnIndices, encoded.positionMap);
193
+ return {
194
+ ipa: result.ipa,
195
+ alignments: result.alignments.map((alignment, idx) => ({
196
+ phoneme: alignment.phoneme,
197
+ phonemeIndex: idx,
198
+ charIndex: alignment.charIndex < 0 ? alignment.charIndex : alignment.charIndex + baseCharIndex,
199
+ })),
200
+ };
32
201
  }
33
202
  }
203
+ const firstInt64 = (tensor) => {
204
+ const data = tensor.data;
205
+ const value = data[0];
206
+ return typeof value === "bigint" ? value : BigInt(Math.trunc(Number(value)));
207
+ };
208
+ const escapeRegex = (value) => value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
209
+ const toGlobalRegex = (delimiter) => {
210
+ if (typeof delimiter === "string") {
211
+ if (delimiter.length === 0) {
212
+ throw new Error("splitDelimiter must not be an empty string");
213
+ }
214
+ return new RegExp(escapeRegex(delimiter), "gu");
215
+ }
216
+ const flags = delimiter.flags.includes("g") ? delimiter.flags : `${delimiter.flags}g`;
217
+ return new RegExp(delimiter.source, flags.includes("u") ? flags : `${flags}u`);
218
+ };
219
+ const splitSegments = (text, delimiter) => {
220
+ if (delimiter === null) {
221
+ return [{ text, startCodeUnit: 0 }];
222
+ }
223
+ const regex = toGlobalRegex(delimiter);
224
+ if (regex.test("")) {
225
+ throw new Error("splitDelimiter must not match an empty string");
226
+ }
227
+ regex.lastIndex = 0;
228
+ const segments = [];
229
+ let start = 0;
230
+ for (const match of text.matchAll(regex)) {
231
+ const end = match.index ?? 0;
232
+ if (end > start) {
233
+ segments.push({ text: text.slice(start, end), startCodeUnit: start });
234
+ }
235
+ start = end + match[0].length;
236
+ }
237
+ if (start < text.length) {
238
+ segments.push({ text: text.slice(start), startCodeUnit: start });
239
+ }
240
+ return segments;
241
+ };
242
+ const codePointOffset = (text, codeUnitOffset) => {
243
+ let codePointIndex = 0;
244
+ let i = 0;
245
+ while (i < codeUnitOffset) {
246
+ const cp = text.codePointAt(i) ?? 0;
247
+ i += cp > 0xffff ? 2 : 1;
248
+ codePointIndex += 1;
249
+ }
250
+ return codePointIndex;
251
+ };
34
252
  //# sourceMappingURL=browser.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"browser.js","sourceRoot":"","sources":["../../src/browser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,MAAM,EAAE,MAAM,iBAAiB,CAAC;AAE3D,OAAO,EAAE,iBAAiB,EAAE,UAAU,EAAa,MAAM,gBAAgB,CAAC;AAQ1E,MAAM,iBAAiB,GAAG,IAAI,GAAG,CAAC,wBAAwB,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,QAAQ,EAAE,CAAC;AAExF,MAAM,OAAO,eAAe;IAI1B,YAAoB,OAAiC;QACnD,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;IACzB,CAAC;IAED,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,UAA0B,EAAE;QAC9C,MAAM,IAAI,GAA6B;YACrC,QAAQ,EAAE,OAAO,CAAC,QAAQ,IAAI,iBAAiB;YAC/C,WAAW,EAAE,OAAO,CAAC,WAAW,IAAI,GAAG;YACvC,YAAY,EAAE,OAAO,CAAC,YAAY,IAAI,EAAE;SACzC,CAAC;QACF,MAAM,KAAK,GAAG,IAAI,eAAe,CAAC,IAAI,CAAC,CAAC;QACxC,KAAK,CAAC,OAAO,GAAG,MAAM,gBAAgB,CAAC,MAAM,CAAC,IAAI,CAAC,QAAQ,EAAE;YAC3D,kBAAkB,EAAE,CAAC,MAAM,CAAC;SAC7B,CAAC,CAAC;QACH,OAAO,KAAK,CAAC;IACf,CAAC;IAED,KAAK,CAAC,OAAO,CAAC,IAAY;QACxB,MAAM,OAAO,GAAG,UAAU,CAAC,IAAI,EAAE,IAAI,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC;QAC3D,MAAM,QAAQ,GAAG,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACjD,MAAM,YAAY,GAAG,IAAI,aAAa,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;QAEtE,MAAM,KAAK,GAA2B;YACpC,SAAS,EAAE,IAAI,MAAM,CAAC,OAAO,EAAE,QAAQ,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC;YACvE,aAAa,EAAE,IAAI,MAAM,CAAC,OAAO,EAAE,YAAY,EAAE,CAAC,CAAC,CAAC,CAAC;SACtD,CAAC;QAEF,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;QAC9C,MAAM,OAAO,GAAG,OAAO,CAAC,WAAW,CAAC,IAAqB,CAAC;QAC1D,MAAM,IAAI,GAAG,OAAO,CAAC,YAAY,CAAC,IAAqB,CAAC;QACxD,OAAO,iBAAiB,CAAC,OAAO,EAAE,IAAI,EAAE,OAAO,CAAC,WAAW,CAAC,CAAC;IAC/D,CAAC;CACF"}
1
+ {"version":3,"file":"browser.js","sourceRoot":"","sources":["../../src/browser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,MAAM,EAAE,MAAM,iBAAiB,CAAC;AAE3D,OAAO,EAAE,iBAAiB,EAAE,UAAU,EAAE,UAAU,EAAa,MAAM,gBAAgB,CAAC;AAetF,MAAM,iBAAiB,GAAG,IAAI,GAAG,CAAC,wBAAwB,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,QAAQ,EAAE,CAAC;AACxF,MAAM,mBAAmB,GAAG,IAAI,GAAG,CAAC,uBAAuB,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,QAAQ,EAAE,CAAC;AACzF,MAAM,wBAAwB,GAAG,IAAI,GAAG,CAAC,4BAA4B,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,QAAQ,EAAE,CAAC;AAEnG,MAAM,WAAW,GAAG,CAAC,SAA4B,EAAE,OAAe,EAAE,GAAG,SAAmB,EAAU,EAAE;IACpG,IAAI,SAAS,CAAC,QAAQ,CAAC,OAAO,CAAC;QAAE,OAAO,OAAO,CAAC;IAChD,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;QACjC,IAAI,SAAS,CAAC,QAAQ,CAAC,QAAQ,CAAC;YAAE,OAAO,QAAQ,CAAC;IACpD,CAAC;IACD,MAAM,OAAO,GAAG,SAAS,CAAC,MAAM,CAC9B,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,OAAO,GAAG,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,CACrE,CAAC;IACF,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,OAAO,CAAC,CAAC,CAAC,CAAC;IAC5C,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvB,MAAM,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,OAAO,GAAG,CAAC,CAAC,CAAC;QACzE,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,OAAO,CAAC,CAAC,CAAC,CAAC;QAC5C,OAAO,OAAO,CAAC,CAAC,CAAC,CAAC;IACpB,CAAC;IACD,MAAM,IAAI,KAAK,CAAC,2CAA2C,OAAO,iBAAiB,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;AAC7G,CAAC,CAAC;AAEF,MAAM,OAAO,eAAe;IAM1B,YAAoB,OAAiC;QACnD,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;IACzB,CAAC;IAED,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,UAA0B,EAAE;QAC9C,IAAI,CAAC,OAAO,CAAC,UAAU,KAAK,SAAS,CAAC,KAAK,CAAC,OAAO,CAAC,cAAc,KAAK,SAAS,CAAC,EAAE,CAAC;YAClF,MAAM,IAAI,KAAK,CAAC,yDAAyD,CAAC,CAAC;QAC7E,CAAC;QACD,MAAM,IAAI,GAA6B;YACrC,QAAQ,EAAE,OAAO,CAAC,QAAQ,IAAI,iBAAiB;YAC/C,UAAU,EAAE,OAAO,CAAC,UAAU,IAAI,mBAAmB;YACrD,cAAc,EAAE,OAAO,CAAC,cAAc,IAAI,wBAAwB;YAClE,WAAW,EAAE,OAAO,CAAC,WAAW,IAAI,GAAG;YACvC,mFAAmF;YACnF,YAAY,EAAE,OAAO,CAAC,YAAY,IAAI,EAAE;SACzC,CAAC;QACF,MAAM,KAAK,GAAG,IAAI,eAAe,CAAC,IAAI,CAAC,CAAC;QACxC,MAAM,gBAAgB,GAAG,OAAO,CAAC,UAAU,KAAK,SAAS,IAAI,OAAO,CAAC,cAAc,KAAK,SAAS,CAAC;QAClG,IAAI,gBAAgB,EAAE,CAAC;YACrB,MAAM,CAAC,cAAc,EAAE,kBAAkB,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;gBAC7D,gBAAgB,CAAC,MAAM,CAAC,IAAI,CAAC,UAAU,EAAE,EAAE,kBAAkB,EAAE,CAAC,MAAM,CAAC,EAAE,CAAC;gBAC1E,gBAAgB,CAAC,MAAM,CAAC,IAAI,CAAC,cAAc,EAAE,EAAE,kBAAkB,EAAE,CAAC,MAAM,CAAC,EAAE,CAAC;aAC/E,CAAC,CAAC;YACH,KAAK,CAAC,cAAc,GAAG,cAAc,CAAC;YACtC,KAAK,CAAC,kBAAkB,GAAG,kBAAkB,CAAC;YAC9C,OAAO,KAAK,CAAC;QACf,CAAC;QAED,IAAI,CAAC;YACH,MAAM,CAAC,cAAc,EAAE,kBAAkB,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;gBAC7D,gBAAgB,CAAC,MAAM,CAAC,IAAI,CAAC,UAAU,EAAE,EAAE,kBAAkB,EAAE,CAAC,MAAM,CAAC,EAAE,CAAC;gBAC1E,gBAAgB,CAAC,MAAM,CAAC,IAAI,CAAC,cAAc,EAAE,EAAE,kBAAkB,EAAE,CAAC,MAAM,CAAC,EAAE,CAAC;aAC/E,CAAC,CAAC;YACH,KAAK,CAAC,cAAc,GAAG,cAAc,CAAC;YACtC,KAAK,CAAC,kBAAkB,GAAG,kBAAkB,CAAC;QAChD,CAAC;QAAC,MAAM,CAAC;YACP,KAAK,CAAC,OAAO,GAAG,MAAM,gBAAgB,CAAC,MAAM,CAAC,IAAI,CAAC,QAAQ,EAAE;gBAC3D,kBAAkB,EAAE,CAAC,MAAM,CAAC;aAC7B,CAAC,CAAC;QACL,CAAC;QACD,OAAO,KAAK,CAAC;IACf,CAAC;IAED,KAAK,CAAC,OAAO,CAAC,IAAY,EAAE,UAAiC,EAAE;QAC7D,MAAM,cAAc,GAAG,OAAO,CAAC,cAAc,IAAI,MAAM,CAAC;QACxD,MAAM,eAAe,GAAG,OAAO,CAAC,eAAe,IAAI,GAAG,CAAC;QACvD,MAAM,QAAQ,GAAG,aAAa,CAAC,IAAI,EAAE,cAAc,CAAC,CAAC;QACrD,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC1B,OAAO,IAAI,CAAC,aAAa,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;QACrC,CAAC;QAED,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,GAAG,CAC/B,QAAQ,CAAC,GAAG,CAAC,KAAK,EAAE,OAAO,EAAE,EAAE,CAC7B,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC,IAAI,EAAE,eAAe,CAAC,IAAI,EAAE,OAAO,CAAC,aAAa,CAAC,CAAC,CAC/E,CACF,CAAC;QAEF,MAAM,QAAQ,GAAa,EAAE,CAAC;QAC9B,MAAM,UAAU,GAA4B,EAAE,CAAC;QAC/C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACxC,IAAI,CAAC,GAAG,CAAC;gBAAE,QAAQ,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;YAC1C,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;YAC9B,KAAK,MAAM,SAAS,IAAI,OAAO,CAAC,CAAC,CAAC,CAAC,UAAU,EAAE,CAAC;gBAC9C,UAAU,CAAC,IAAI,CAAC;oBACd,OAAO,EAAE,SAAS,CAAC,OAAO;oBAC1B,YAAY,EAAE,UAAU,CAAC,MAAM;oBAC/B,SAAS,EAAE,SAAS,CAAC,SAAS;iBAC/B,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QACD,OAAO,EAAE,GAAG,EAAE,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,UAAU,EAAE,CAAC;IAChD,CAAC;IAEO,KAAK,CAAC,aAAa,CAAC,IAAY,EAAE,aAAqB;QAC7D,IAAI,IAAI,CAAC,cAAc,IAAI,IAAI,CAAC,kBAAkB,EAAE,CAAC;YACnD,OAAO,IAAI,CAAC,kBAAkB,CAAC,IAAI,EAAE,aAAa,CAAC,CAAC;QACtD,CAAC;QACD,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;YAClB,MAAM,IAAI,KAAK,CAAC,6BAA6B,CAAC,CAAC;QACjD,CAAC;QACD,MAAM,OAAO,GAAG,UAAU,CAAC,IAAI,EAAE,IAAI,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC;QAC3D,MAAM,QAAQ,GAAG,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACjD,MAAM,YAAY,GAAG,IAAI,aAAa,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;QAEtE,MAAM,KAAK,GAA2B;YACpC,SAAS,EAAE,IAAI,MAAM,CAAC,OAAO,EAAE,QAAQ,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC;YACvE,aAAa,EAAE,IAAI,MAAM,CAAC,OAAO,EAAE,YAAY,EAAE,CAAC,CAAC,CAAC,CAAC;SACtD,CAAC;QAEF,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;QAC9C,MAAM,OAAO,GAAG,OAAO,CAAC,WAAW,CAAC,IAAqB,CAAC;QAC1D,MAAM,IAAI,GAAG,OAAO,CAAC,YAAY,CAAC,IAAqB,CAAC;QACxD,MAAM,MAAM,GAAG,iBAAiB,CAAC,OAAO,EAAE,IAAI,EAAE,OAAO,CAAC,WAAW,CAAC,CAAC;QACrE,OAAO;YACL,GAAG,EAAE,MAAM,CAAC,GAAG;YACf,UAAU,EAAE,MAAM,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,SAAS,EAAE,GAAG,EAAE,EAAE,CAAC,CAAC;gBACrD,OAAO,EAAE,SAAS,CAAC,OAAO;gBAC1B,YAAY,EAAE,GAAG;gBACjB,SAAS,EACP,SAAS,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,SAAS,GAAG,aAAa;aACtF,CAAC,CAAC;SACJ,CAAC;IACJ,CAAC;IAEO,KAAK,CAAC,kBAAkB,CAAC,IAAY,EAAE,aAAqB;QAClE,IAAI,CAAC,IAAI,CAAC,cAAc,IAAI,CAAC,IAAI,CAAC,kBAAkB,EAAE,CAAC;YACrD,MAAM,IAAI,KAAK,CAAC,yCAAyC,CAAC,CAAC;QAC7D,CAAC;QAED,MAAM,OAAO,GAAG,UAAU,CAAC,IAAI,EAAE,IAAI,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC;QAC3D,MAAM,QAAQ,GAAG,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACjD,MAAM,YAAY,GAAG,IAAI,aAAa,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;QACtE,MAAM,YAAY,GAA2B;YAC3C,SAAS,EAAE,IAAI,MAAM,CAAC,OAAO,EAAE,QAAQ,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC;YACvE,aAAa,EAAE,IAAI,MAAM,CAAC,OAAO,EAAE,YAAY,EAAE,CAAC,CAAC,CAAC,CAAC;SACtD,CAAC;QAEF,MAAM,cAAc,GAAG,MAAM,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;QACnE,MAAM,kBAAkB,GAAG,IAAI,CAAC,cAAc,CAAC,WAAW,CAAC;QAC3D,MAAM,iBAAiB,GAAG,IAAI,CAAC,kBAAkB,CAAC,UAAU,CAAC;QAC7D,MAAM,kBAAkB,GAAG,IAAI,CAAC,kBAAkB,CAAC,WAAW,CAAC;QAC/D,MAAM,QAAQ,GAAG;YACf,eAAe,EAAE,WAAW,CAAC,kBAAkB,EAAE,iBAAiB,CAAC;YACnE,cAAc,EAAE,WAAW,CAAC,kBAAkB,EAAE,gBAAgB,CAAC;YACjE,YAAY,EAAE,WAAW,CAAC,kBAAkB,EAAE,cAAc,CAAC;YAC7D,MAAM,EAAE,WAAW,CAAC,kBAAkB,EAAE,QAAQ,CAAC;YACjD,SAAS,EAAE,WAAW,CAAC,kBAAkB,EAAE,WAAW,CAAC;SACxD,CAAC;QACF,MAAM,KAAK,GAAG;YACZ,iBAAiB,EAAE,WAAW,CAAC,iBAAiB,EAAE,mBAAmB,CAAC;YACtE,eAAe,EAAE,WAAW,CAAC,iBAAiB,EAAE,iBAAiB,CAAC;YAClE,cAAc,EAAE,WAAW,CAAC,iBAAiB,EAAE,gBAAgB,CAAC;YAChE,YAAY,EAAE,WAAW,CAAC,iBAAiB,EAAE,cAAc,CAAC;YAC5D,SAAS,EAAE,WAAW,CAAC,iBAAiB,EAAE,WAAW,EAAE,cAAc,CAAC;YACtE,MAAM,EAAE,WAAW,CAAC,iBAAiB,EAAE,QAAQ,EAAE,WAAW,CAAC;YAC7D,SAAS,EAAE,WAAW,CAAC,iBAAiB,EAAE,WAAW,CAAC;SACvD,CAAC;QACF,MAAM,MAAM,GAAG;YACb,cAAc,EAAE,WAAW,CAAC,kBAAkB,EAAE,gBAAgB,CAAC;YACjE,MAAM,EAAE,WAAW,CAAC,kBAAkB,EAAE,YAAY,EAAE,QAAQ,CAAC;YAC/D,SAAS,EAAE,WAAW,CAAC,kBAAkB,EAAE,eAAe,EAAE,WAAW,CAAC;YACxE,WAAW,EAAE,WAAW,CAAC,kBAAkB,EAAE,aAAa,CAAC;SAC5D,CAAC;QACF,MAAM,aAAa,GAAG;YACpB,eAAe,EAAE,cAAc,CAAC,QAAQ,CAAC,eAAe,CAAW;YACnE,cAAc,EAAE,cAAc,CAAC,QAAQ,CAAC,cAAc,CAAW;YACjE,YAAY,EAAE,cAAc,CAAC,QAAQ,CAAC,YAAY,CAAW;YAC7D,MAAM,EAAE,cAAc,CAAC,QAAQ,CAAC,MAAM,CAAW;YACjD,SAAS,EAAE,cAAc,CAAC,QAAQ,CAAC,SAAS,CAAW;SACxD,CAAC;QAEF,MAAM,MAAM,GAAG,MAAM,CAAC,aAAa,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;QAClE,MAAM,SAAS,GAAG,IAAI,YAAY,CAAC,MAAM,CAAC,CAAC;QAC3C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,EAAE,CAAC,EAAE;YAAE,SAAS,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QAClD,MAAM,eAAe,GAAG,IAAI,MAAM,CAAC,SAAS,EAAE,SAAS,EAAE,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC;QAEtE,IAAI,YAAY,GAAG,IAAI,MAAM,CAAC,OAAO,EAAE,IAAI,aAAa,CAAC,CAAC,MAAM,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QAC5F,IAAI,MAAM,GAAG,aAAa,CAAC,MAAM,CAAC;QAClC,IAAI,QAAQ,GAAG,aAAa,CAAC,SAAS,CAAC;QAEvC,MAAM,OAAO,GAAa,EAAE,CAAC;QAC7B,MAAM,WAAW,GAAa,EAAE,CAAC;QAEjC,KAAK,IAAI,IAAI,GAAG,CAAC,EAAE,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,YAAY,EAAE,IAAI,EAAE,EAAE,CAAC;YAC5D,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,kBAAkB,CAAC,GAAG,CAAC;gBACpD,CAAC,KAAK,CAAC,iBAAiB,CAAC,EAAE,YAAY;gBACvC,CAAC,KAAK,CAAC,eAAe,CAAC,EAAE,aAAa,CAAC,eAAe;gBACtD,CAAC,KAAK,CAAC,cAAc,CAAC,EAAE,aAAa,CAAC,cAAc;gBACpD,CAAC,KAAK,CAAC,YAAY,CAAC,EAAE,aAAa,CAAC,YAAY;gBAChD,CAAC,KAAK,CAAC,SAAS,CAAC,EAAE,QAAQ;gBAC3B,CAAC,KAAK,CAAC,MAAM,CAAC,EAAE,MAAM;gBACtB,CAAC,KAAK,CAAC,SAAS,CAAC,EAAE,eAAe;aACnC,CAAC,CAAC;YAEH,MAAM,SAAS,GAAG,UAAU,CAAC,WAAW,CAAC,MAAM,CAAC,cAAc,CAAW,CAAC,CAAC;YAC3E,MAAM,OAAO,GAAG,UAAU,CAAC,WAAW,CAAC,MAAM,CAAC,WAAW,CAAW,CAAC,CAAC;YACtE,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YACxB,WAAW,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YAE1B,MAAM,GAAG,WAAW,CAAC,MAAM,CAAC,MAAM,CAAW,CAAC;YAC9C,QAAQ,GAAG,WAAW,CAAC,MAAM,CAAC,SAAS,CAAW,CAAC;YACnD,YAAY,GAAG,IAAI,MAAM,CAAC,OAAO,EAAE,IAAI,aAAa,CAAC,CAAC,SAAS,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;YAE3E,IAAI,SAAS,KAAK,MAAM,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;gBACzC,MAAM;YACR,CAAC;QACH,CAAC;QAED,MAAM,MAAM,GAAG,iBAAiB,CAAC,OAAO,EAAE,WAAW,EAAE,OAAO,CAAC,WAAW,CAAC,CAAC;QAC5E,OAAO;YACL,GAAG,EAAE,MAAM,CAAC,GAAG;YACf,UAAU,EAAE,MAAM,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,SAAS,EAAE,GAAG,EAAE,EAAE,CAAC,CAAC;gBACrD,OAAO,EAAE,SAAS,CAAC,OAAO;gBAC1B,YAAY,EAAE,GAAG;gBACjB,SAAS,EACP,SAAS,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,SAAS,GAAG,aAAa;aACtF,CAAC,CAAC;SACJ,CAAC;IACJ,CAAC;CACF;AAED,MAAM,UAAU,GAAG,CAAC,MAAc,EAAU,EAAE;IAC5C,MAAM,IAAI,GAAG,MAAM,CAAC,IAAkC,CAAC;IACvD,MAAM,KAAK,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;IACtB,OAAO,OAAO,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;AAC/E,CAAC,CAAC;AAEF,MAAM,WAAW,GAAG,CAAC,KAAa,EAAU,EAAE,CAAC,KAAK,CAAC,OAAO,CAAC,qBAAqB,EAAE,MAAM,CAAC,CAAC;AAE5F,MAAM,aAAa,GAAG,CAAC,SAA0B,EAAU,EAAE;IAC3D,IAAI,OAAO,SAAS,KAAK,QAAQ,EAAE,CAAC;QAClC,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC3B,MAAM,IAAI,KAAK,CAAC,4CAA4C,CAAC,CAAC;QAChE,CAAC;QACD,OAAO,IAAI,MAAM,CAAC,WAAW,CAAC,SAAS,CAAC,EAAE,IAAI,CAAC,CAAC;IAClD,CAAC;IACD,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,SAAS,CAAC,KAAK,GAAG,CAAC;IACtF,OAAO,IAAI,MAAM,CAAC,SAAS,CAAC,MAAM,EAAE,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,KAAK,GAAG,CAAC,CAAC;AACjF,CAAC,CAAC;AAEF,MAAM,aAAa,GAAG,CACpB,IAAY,EACZ,SAAiC,EACe,EAAE;IAClD,IAAI,SAAS,KAAK,IAAI,EAAE,CAAC;QACvB,OAAO,CAAC,EAAE,IAAI,EAAE,aAAa,EAAE,CAAC,EAAE,CAAC,CAAC;IACtC,CAAC;IACD,MAAM,KAAK,GAAG,aAAa,CAAC,SAAS,CAAC,CAAC;IACvC,IAAI,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,CAAC;QACnB,MAAM,IAAI,KAAK,CAAC,+CAA+C,CAAC,CAAC;IACnE,CAAC;IACD,KAAK,CAAC,SAAS,GAAG,CAAC,CAAC;IAEpB,MAAM,QAAQ,GAAmD,EAAE,CAAC;IACpE,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QACzC,MAAM,GAAG,GAAG,KAAK,CAAC,KAAK,IAAI,CAAC,CAAC;QAC7B,IAAI,GAAG,GAAG,KAAK,EAAE,CAAC;YAChB,QAAQ,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,EAAE,aAAa,EAAE,KAAK,EAAE,CAAC,CAAC;QACxE,CAAC;QACD,KAAK,GAAG,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;IAChC,CAAC;IACD,IAAI,KAAK,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;QACxB,QAAQ,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,aAAa,EAAE,KAAK,EAAE,CAAC,CAAC;IACnE,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC,CAAC;AAEF,MAAM,eAAe,GAAG,CAAC,IAAY,EAAE,cAAsB,EAAU,EAAE;IACvE,IAAI,cAAc,GAAG,CAAC,CAAC;IACvB,IAAI,CAAC,GAAG,CAAC,CAAC;IACV,OAAO,CAAC,GAAG,cAAc,EAAE,CAAC;QAC1B,MAAM,EAAE,GAAG,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QACpC,CAAC,IAAI,EAAE,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACzB,cAAc,IAAI,CAAC,CAAC;IACtB,CAAC;IACD,OAAO,cAAc,CAAC;AACxB,CAAC,CAAC"}
@@ -13,27 +13,26 @@ const WHITESPACE_REGEX = /\s/;
13
13
  export const splitTextToJamo = (text) => {
14
14
  const tokens = [];
15
15
  const mapping = [];
16
- const normalized = text.toLocaleLowerCase("und");
17
- let index = 0;
18
- while (index < normalized.length) {
19
- const code = normalized.codePointAt(index);
20
- const char = String.fromCodePoint(code);
21
- const charLen = char.length;
22
- if (WHITESPACE_REGEX.test(char)) {
23
- index += charLen;
16
+ let offset = 0;
17
+ let charIndex = 0;
18
+ while (offset < text.length) {
19
+ const code = text.codePointAt(offset);
20
+ const ch = String.fromCodePoint(code);
21
+ const charLen = ch.length;
22
+ offset += charLen;
23
+ if (WHITESPACE_REGEX.test(ch)) {
24
+ charIndex += 1;
24
25
  continue;
25
26
  }
26
- while (index < normalized.length) {
27
- const innerCode = normalized.codePointAt(index);
27
+ const normalizedPart = ch.toLocaleLowerCase("und");
28
+ let normalizedOffset = 0;
29
+ while (normalizedOffset < normalizedPart.length) {
30
+ const innerCode = normalizedPart.codePointAt(normalizedOffset);
28
31
  const innerChar = String.fromCodePoint(innerCode);
29
- const innerLen = innerChar.length;
30
- if (WHITESPACE_REGEX.test(innerChar)) {
31
- break;
32
- }
32
+ normalizedOffset += innerChar.length;
33
33
  if (!isHangulSyllable(innerCode)) {
34
34
  tokens.push(innerChar);
35
- mapping.push(index);
36
- index += innerLen;
35
+ mapping.push(charIndex);
37
36
  continue;
38
37
  }
39
38
  const syllableIndex = innerCode - S_BASE;
@@ -41,15 +40,15 @@ export const splitTextToJamo = (text) => {
41
40
  const v = Math.floor((syllableIndex % N_COUNT) / T_COUNT);
42
41
  const t = syllableIndex % T_COUNT;
43
42
  tokens.push(String.fromCodePoint(L_BASE + l));
44
- mapping.push(index);
43
+ mapping.push(charIndex);
45
44
  tokens.push(String.fromCodePoint(V_BASE + v));
46
- mapping.push(index);
45
+ mapping.push(charIndex);
47
46
  if (t !== 0) {
48
47
  tokens.push(String.fromCodePoint(T_BASE + t));
49
- mapping.push(index);
48
+ mapping.push(charIndex);
50
49
  }
51
- index += innerLen;
52
50
  }
51
+ charIndex += 1;
53
52
  }
54
53
  return { tokens, originalIndices: mapping };
55
54
  };
@@ -1 +1 @@
1
- {"version":3,"file":"jamo.js","sourceRoot":"","sources":["../../src/jamo.ts"],"names":[],"mappings":"AAAA,MAAM,MAAM,GAAG,MAAM,CAAC;AACtB,MAAM,MAAM,GAAG,MAAM,CAAC;AACtB,MAAM,MAAM,GAAG,MAAM,CAAC;AACtB,MAAM,MAAM,GAAG,MAAM,CAAC;AACtB,MAAM,OAAO,GAAG,EAAE,CAAC;AACnB,MAAM,OAAO,GAAG,EAAE,CAAC;AACnB,MAAM,OAAO,GAAG,EAAE,CAAC;AACnB,MAAM,OAAO,GAAG,OAAO,GAAG,OAAO,CAAC;AAClC,MAAM,OAAO,GAAG,OAAO,GAAG,OAAO,CAAC;AAClC,MAAM,cAAc,GAAG,MAAM,CAAC;AAO9B,MAAM,gBAAgB,GAAG,CAAC,IAAY,EAAE,EAAE,CACxC,IAAI,IAAI,MAAM,IAAI,IAAI,GAAG,MAAM,GAAG,OAAO,CAAC;AAE5C,MAAM,gBAAgB,GAAG,IAAI,CAAC;AAE9B,MAAM,CAAC,MAAM,eAAe,GAAG,CAAC,IAAY,EAAgB,EAAE;IAC5D,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,MAAM,OAAO,GAAa,EAAE,CAAC;IAC7B,MAAM,UAAU,GAAG,IAAI,CAAC,iBAAiB,CAAC,KAAK,CAAC,CAAC;IACjD,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,OAAO,KAAK,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC;QACjC,MAAM,IAAI,GAAG,UAAU,CAAC,WAAW,CAAC,KAAK,CAAE,CAAC;QAC5C,MAAM,IAAI,GAAG,MAAM,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;QACxC,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC;QAE5B,IAAI,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAChC,KAAK,IAAI,OAAO,CAAC;YACjB,SAAS;QACX,CAAC;QAED,OAAO,KAAK,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC;YACjC,MAAM,SAAS,GAAG,UAAU,CAAC,WAAW,CAAC,KAAK,CAAE,CAAC;YACjD,MAAM,SAAS,GAAG,MAAM,CAAC,aAAa,CAAC,SAAS,CAAC,CAAC;YAClD,MAAM,QAAQ,GAAG,SAAS,CAAC,MAAM,CAAC;YAClC,IAAI,gBAAgB,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC;gBACrC,MAAM;YACR,CAAC;YAED,IAAI,CAAC,gBAAgB,CAAC,SAAS,CAAC,EAAE,CAAC;gBACjC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;gBACvB,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;gBACpB,KAAK,IAAI,QAAQ,CAAC;gBAClB,SAAS;YACX,CAAC;YAED,MAAM,aAAa,GAAG,SAAS,GAAG,MAAM,CAAC;YACzC,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,aAAa,GAAG,OAAO,CAAC,CAAC;YAC9C,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,aAAa,GAAG,OAAO,CAAC,GAAG,OAAO,CAAC,CAAC;YAC1D,MAAM,CAAC,GAAG,aAAa,GAAG,OAAO,CAAC;YAClC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,aAAa,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC;YAC9C,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACpB,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,aAAa,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC;YAC9C,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACpB,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;gBACZ,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,aAAa,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC;gBAC9C,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACtB,CAAC;YACD,KAAK,IAAI,QAAQ,CAAC;QACpB,CAAC;IACH,CAAC;IACD,OAAO,EAAE,MAAM,EAAE,eAAe,EAAE,OAAO,EAAE,CAAC;AAC9C,CAAC,CAAC;AAEF,MAAM,eAAe,GAAG,CAAC,OAAe,EAAE,MAAc,EAAE,KAAa,EAAE,EAAE,CACzE,MAAM,CAAC,aAAa,CAAC,MAAM,GAAG,OAAO,GAAG,OAAO,GAAG,MAAM,GAAG,OAAO,GAAG,KAAK,CAAC,CAAC;AAE9E,MAAM,CAAC,MAAM,cAAc,GAAG,CAAC,MAAgB,EAAU,EAAE;IACzD,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,IAAI,OAAO,GAAkB,IAAI,CAAC;IAClC,IAAI,MAAM,GAAkB,IAAI,CAAC;IACjC,IAAI,KAAK,GAAG,CAAC,CAAC;IAEd,MAAM,KAAK,GAAG,GAAG,EAAE;QACjB,IAAI,OAAO,KAAK,IAAI,IAAI,MAAM,KAAK,IAAI,EAAE,CAAC;YACxC,MAAM,CAAC,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,CAAC,CAAC,CAAC;QACvD,CAAC;QACD,OAAO,GAAG,IAAI,CAAC;QACf,MAAM,GAAG,IAAI,CAAC;QACd,KAAK,GAAG,CAAC,CAAC;IACZ,CAAC,CAAC;IAEF,MAAM,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,EAAE;QACvB,MAAM,IAAI,GAAG,KAAK,CAAC,WAAW,CAAC,CAAC,CAAE,CAAC;QACnC,IAAI,IAAI,IAAI,MAAM,IAAI,IAAI,GAAG,MAAM,GAAG,OAAO,EAAE,CAAC;YAC9C,IAAI,OAAO,KAAK,IAAI,IAAI,MAAM,KAAK,IAAI,EAAE,CAAC;gBACxC,KAAK,EAAE,CAAC;YACV,CAAC;YACD,OAAO,GAAG,IAAI,GAAG,MAAM,CAAC;QAC1B,CAAC;aAAM,IAAI,IAAI,IAAI,MAAM,IAAI,IAAI,GAAG,MAAM,GAAG,OAAO,EAAE,CAAC;YACrD,IAAI,OAAO,KAAK,IAAI;gBAAE,OAAO,GAAG,cAAc,GAAG,MAAM,CAAC;YACxD,IAAI,MAAM,KAAK,IAAI;gBAAE,KAAK,EAAE,CAAC;YAC7B,MAAM,GAAG,IAAI,GAAG,MAAM,CAAC;QACzB,CAAC;aAAM,IAAI,IAAI,GAAG,MAAM,IAAI,IAAI,IAAI,MAAM,GAAG,OAAO,EAAE,CAAC;YACrD,IAAI,OAAO,KAAK,IAAI,IAAI,MAAM,KAAK,IAAI,EAAE,CAAC;gBACxC,KAAK,EAAE,CAAC;gBACR,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACrB,CAAC;iBAAM,CAAC;gBACN,KAAK,GAAG,IAAI,GAAG,MAAM,CAAC;gBACtB,KAAK,EAAE,CAAC;YACV,CAAC;QACH,CAAC;aAAM,CAAC;YACN,KAAK,EAAE,CAAC;YACR,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACrB,CAAC;IACH,CAAC,CAAC,CAAC;IACH,KAAK,EAAE,CAAC;IACR,OAAO,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;AACzB,CAAC,CAAC"}
1
+ {"version":3,"file":"jamo.js","sourceRoot":"","sources":["../../src/jamo.ts"],"names":[],"mappings":"AAAA,MAAM,MAAM,GAAG,MAAM,CAAC;AACtB,MAAM,MAAM,GAAG,MAAM,CAAC;AACtB,MAAM,MAAM,GAAG,MAAM,CAAC;AACtB,MAAM,MAAM,GAAG,MAAM,CAAC;AACtB,MAAM,OAAO,GAAG,EAAE,CAAC;AACnB,MAAM,OAAO,GAAG,EAAE,CAAC;AACnB,MAAM,OAAO,GAAG,EAAE,CAAC;AACnB,MAAM,OAAO,GAAG,OAAO,GAAG,OAAO,CAAC;AAClC,MAAM,OAAO,GAAG,OAAO,GAAG,OAAO,CAAC;AAClC,MAAM,cAAc,GAAG,MAAM,CAAC;AAO9B,MAAM,gBAAgB,GAAG,CAAC,IAAY,EAAE,EAAE,CACxC,IAAI,IAAI,MAAM,IAAI,IAAI,GAAG,MAAM,GAAG,OAAO,CAAC;AAE5C,MAAM,gBAAgB,GAAG,IAAI,CAAC;AAE9B,MAAM,CAAC,MAAM,eAAe,GAAG,CAAC,IAAY,EAAgB,EAAE;IAC5D,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,MAAM,OAAO,GAAa,EAAE,CAAC;IAC7B,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,OAAO,MAAM,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;QAC5B,MAAM,IAAI,GAAG,IAAI,CAAC,WAAW,CAAC,MAAM,CAAE,CAAC;QACvC,MAAM,EAAE,GAAG,MAAM,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;QACtC,MAAM,OAAO,GAAG,EAAE,CAAC,MAAM,CAAC;QAC1B,MAAM,IAAI,OAAO,CAAC;QAClB,IAAI,gBAAgB,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,CAAC;YAC9B,SAAS,IAAI,CAAC,CAAC;YACf,SAAS;QACX,CAAC;QACD,MAAM,cAAc,GAAG,EAAE,CAAC,iBAAiB,CAAC,KAAK,CAAC,CAAC;QACnD,IAAI,gBAAgB,GAAG,CAAC,CAAC;QACzB,OAAO,gBAAgB,GAAG,cAAc,CAAC,MAAM,EAAE,CAAC;YAChD,MAAM,SAAS,GAAG,cAAc,CAAC,WAAW,CAAC,gBAAgB,CAAE,CAAC;YAChE,MAAM,SAAS,GAAG,MAAM,CAAC,aAAa,CAAC,SAAS,CAAC,CAAC;YAClD,gBAAgB,IAAI,SAAS,CAAC,MAAM,CAAC;YAErC,IAAI,CAAC,gBAAgB,CAAC,SAAS,CAAC,EAAE,CAAC;gBACjC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;gBACvB,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;gBACxB,SAAS;YACX,CAAC;YAED,MAAM,aAAa,GAAG,SAAS,GAAG,MAAM,CAAC;YACzC,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,aAAa,GAAG,OAAO,CAAC,CAAC;YAC9C,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,aAAa,GAAG,OAAO,CAAC,GAAG,OAAO,CAAC,CAAC;YAC1D,MAAM,CAAC,GAAG,aAAa,GAAG,OAAO,CAAC;YAClC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,aAAa,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC;YAC9C,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YACxB,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,aAAa,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC;YAC9C,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YACxB,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;gBACZ,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,aAAa,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC;gBAC9C,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YAC1B,CAAC;QACH,CAAC;QACD,SAAS,IAAI,CAAC,CAAC;IACjB,CAAC;IACD,OAAO,EAAE,MAAM,EAAE,eAAe,EAAE,OAAO,EAAE,CAAC;AAC9C,CAAC,CAAC;AAEF,MAAM,eAAe,GAAG,CAAC,OAAe,EAAE,MAAc,EAAE,KAAa,EAAE,EAAE,CACzE,MAAM,CAAC,aAAa,CAAC,MAAM,GAAG,OAAO,GAAG,OAAO,GAAG,MAAM,GAAG,OAAO,GAAG,KAAK,CAAC,CAAC;AAE9E,MAAM,CAAC,MAAM,cAAc,GAAG,CAAC,MAAgB,EAAU,EAAE;IACzD,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,IAAI,OAAO,GAAkB,IAAI,CAAC;IAClC,IAAI,MAAM,GAAkB,IAAI,CAAC;IACjC,IAAI,KAAK,GAAG,CAAC,CAAC;IAEd,MAAM,KAAK,GAAG,GAAG,EAAE;QACjB,IAAI,OAAO,KAAK,IAAI,IAAI,MAAM,KAAK,IAAI,EAAE,CAAC;YACxC,MAAM,CAAC,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,CAAC,CAAC,CAAC;QACvD,CAAC;QACD,OAAO,GAAG,IAAI,CAAC;QACf,MAAM,GAAG,IAAI,CAAC;QACd,KAAK,GAAG,CAAC,CAAC;IACZ,CAAC,CAAC;IAEF,MAAM,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,EAAE;QACvB,MAAM,IAAI,GAAG,KAAK,CAAC,WAAW,CAAC,CAAC,CAAE,CAAC;QACnC,IAAI,IAAI,IAAI,MAAM,IAAI,IAAI,GAAG,MAAM,GAAG,OAAO,EAAE,CAAC;YAC9C,IAAI,OAAO,KAAK,IAAI,IAAI,MAAM,KAAK,IAAI,EAAE,CAAC;gBACxC,KAAK,EAAE,CAAC;YACV,CAAC;YACD,OAAO,GAAG,IAAI,GAAG,MAAM,CAAC;QAC1B,CAAC;aAAM,IAAI,IAAI,IAAI,MAAM,IAAI,IAAI,GAAG,MAAM,GAAG,OAAO,EAAE,CAAC;YACrD,IAAI,OAAO,KAAK,IAAI;gBAAE,OAAO,GAAG,cAAc,GAAG,MAAM,CAAC;YACxD,IAAI,MAAM,KAAK,IAAI;gBAAE,KAAK,EAAE,CAAC;YAC7B,MAAM,GAAG,IAAI,GAAG,MAAM,CAAC;QACzB,CAAC;aAAM,IAAI,IAAI,GAAG,MAAM,IAAI,IAAI,IAAI,MAAM,GAAG,OAAO,EAAE,CAAC;YACrD,IAAI,OAAO,KAAK,IAAI,IAAI,MAAM,KAAK,IAAI,EAAE,CAAC;gBACxC,KAAK,EAAE,CAAC;gBACR,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACrB,CAAC;iBAAM,CAAC;gBACN,KAAK,GAAG,IAAI,GAAG,MAAM,CAAC;gBACtB,KAAK,EAAE,CAAC;YACV,CAAC;QACH,CAAC;aAAM,CAAC;YACN,KAAK,EAAE,CAAC;YACR,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACrB,CAAC;IACH,CAAC,CAAC,CAAC;IACH,KAAK,EAAE,CAAC;IACR,OAAO,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;AACzB,CAAC,CAAC"}
@@ -6,23 +6,26 @@ const decoderTokenToId = new Map(VOCAB.decoder.map((token, idx) => [token, idx])
6
6
  export const encodeText = (text, maxInputLen) => {
7
7
  const jamoSeq = splitTextToJamo(text);
8
8
  const tokens = jamoSeq.tokens.length ? jamoSeq.tokens : ["<unk>"];
9
+ const indices = jamoSeq.originalIndices.length ? jamoSeq.originalIndices : [-1];
9
10
  const ids = tokens.map((token) => encoderTokenToId.get(token) ?? encoderTokenToId.get("<unk>"));
10
11
  const length = Math.min(ids.length, maxInputLen);
11
12
  const padded = new Array(maxInputLen).fill(BigInt(encoderTokenToId.get("<pad>")));
12
13
  for (let i = 0; i < length; i++) {
13
14
  padded[i] = BigInt(ids[i]);
14
15
  }
15
- const positionMap = jamoSeq.originalIndices.slice(0, length);
16
- return { ids: padded, length, positionMap: positionMap.length ? positionMap : [0] };
16
+ const positionMap = indices.slice(0, length);
17
+ return { ids: padded, length, positionMap: positionMap.length ? positionMap : [-1] };
17
18
  };
18
19
  export const decoderIds = {
19
20
  pad: decoderTokenToId.get("<pad>"),
20
21
  sos: decoderTokenToId.get("<sos>"),
21
22
  eos: decoderTokenToId.get("<eos>"),
23
+ unk: decoderTokenToId.get("<unk>"),
22
24
  };
23
25
  export const decodeIdsToResult = (ids, attnIndices, positionMap) => {
24
26
  const phonemes = [];
25
27
  const alignments = [];
28
+ let outOfRangeTokenCount = 0;
26
29
  for (let i = 0; i < ids.length; i++) {
27
30
  const tokenId = Number(ids[i]);
28
31
  if (tokenId === decoderIds.eos)
@@ -31,15 +34,21 @@ export const decodeIdsToResult = (ids, attnIndices, positionMap) => {
31
34
  continue;
32
35
  if (tokenId === decoderIds.sos && phonemes.length === 0)
33
36
  continue;
34
- const phoneme = VOCAB.decoder[tokenId] ?? "";
37
+ const phoneme = VOCAB.decoder[tokenId];
38
+ if (phoneme === undefined) {
39
+ outOfRangeTokenCount += 1;
40
+ }
35
41
  const srcPos = Math.max(0, Math.min(Number(attnIndices[i] ?? 0), positionMap.length > 0 ? positionMap.length - 1 : 0));
36
- const charIndex = positionMap.length > 0 ? positionMap[srcPos] : 0;
42
+ const charIndex = positionMap.length > 0 ? positionMap[srcPos] : -1;
37
43
  alignments.push({
38
- phoneme,
44
+ phoneme: phoneme ?? VOCAB.decoder[decoderIds.unk],
39
45
  phonemeIndex: alignments.length,
40
46
  charIndex,
41
47
  });
42
- phonemes.push(phoneme);
48
+ phonemes.push(phoneme ?? VOCAB.decoder[decoderIds.unk]);
49
+ }
50
+ if (outOfRangeTokenCount > 0 && typeof console !== "undefined") {
51
+ console.warn(`[hama-js] decodeIdsToResult saw ${outOfRangeTokenCount} out-of-range decoder ids; mapped to <unk>.`);
43
52
  }
44
53
  return { ipa: phonemes.join(""), alignments };
45
54
  };
@@ -1 +1 @@
1
- {"version":3,"file":"tokenizer.js","sourceRoot":"","sources":["../../src/tokenizer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAgB,MAAM,WAAW,CAAC;AAC1D,OAAO,SAAS,MAAM,yBAAyB,CAAC;AAOhD,MAAM,CAAC,MAAM,KAAK,GAAe,SAAuB,CAAC;AAEzD,MAAM,gBAAgB,GAAG,IAAI,GAAG,CAC9B,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,GAAG,EAAE,EAAE,CAAC,CAAC,KAAK,EAAE,GAAG,CAAU,CAAC,CACzD,CAAC;AAEF,MAAM,gBAAgB,GAAG,IAAI,GAAG,CAC9B,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,GAAG,EAAE,EAAE,CAAC,CAAC,KAAK,EAAE,GAAG,CAAU,CAAC,CACzD,CAAC;AAmBF,MAAM,CAAC,MAAM,UAAU,GAAG,CACxB,IAAY,EACZ,WAAmB,EACN,EAAE;IACf,MAAM,OAAO,GAAiB,eAAe,CAAC,IAAI,CAAC,CAAC;IACpD,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;IAClE,MAAM,GAAG,GAAG,MAAM,CAAC,GAAG,CACpB,CAAC,KAAK,EAAE,EAAE,CAAC,gBAAgB,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,gBAAgB,CAAC,GAAG,CAAC,OAAO,CAAE,CACzE,CAAC;IACF,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,EAAE,WAAW,CAAC,CAAC;IACjD,MAAM,MAAM,GAAG,IAAI,KAAK,CAAS,WAAW,CAAC,CAAC,IAAI,CAChD,MAAM,CAAC,gBAAgB,CAAC,GAAG,CAAC,OAAO,CAAE,CAAC,CACvC,CAAC;IACF,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAChC,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IAC7B,CAAC;IACD,MAAM,WAAW,GAAG,OAAO,CAAC,eAAe,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC;IAC7D,OAAO,EAAE,GAAG,EAAE,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;AACtF,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,UAAU,GAAG;IACxB,GAAG,EAAE,gBAAgB,CAAC,GAAG,CAAC,OAAO,CAAE;IACnC,GAAG,EAAE,gBAAgB,CAAC,GAAG,CAAC,OAAO,CAAE;IACnC,GAAG,EAAE,gBAAgB,CAAC,GAAG,CAAC,OAAO,CAAE;CACpC,CAAC;AAEF,MAAM,CAAC,MAAM,iBAAiB,GAAG,CAC/B,GAA+B,EAC/B,WAAuC,EACvC,WAAqB,EACV,EAAE;IACb,MAAM,QAAQ,GAAa,EAAE,CAAC;IAC9B,MAAM,UAAU,GAAgB,EAAE,CAAC;IAEnC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACpC,MAAM,OAAO,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QAC/B,IAAI,OAAO,KAAK,UAAU,CAAC,GAAG;YAAE,MAAM;QACtC,IAAI,OAAO,KAAK,UAAU,CAAC,GAAG;YAAE,SAAS;QACzC,IAAI,OAAO,KAAK,UAAU,CAAC,GAAG,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;YAAE,SAAS;QAElE,MAAM,OAAO,GAAG,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;QAC7C,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CACrB,CAAC,EACD,IAAI,CAAC,GAAG,CACN,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,EAC3B,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CACpD,CACF,CAAC;QACF,MAAM,SAAS,GAAG,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACnE,UAAU,CAAC,IAAI,CAAC;YACd,OAAO;YACP,YAAY,EAAE,UAAU,CAAC,MAAM;YAC/B,SAAS;SACV,CAAC,CAAC;QACH,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACzB,CAAC;IAED,OAAO,EAAE,GAAG,EAAE,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,UAAU,EAAE,CAAC;AAChD,CAAC,CAAC"}
1
+ {"version":3,"file":"tokenizer.js","sourceRoot":"","sources":["../../src/tokenizer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAgB,MAAM,WAAW,CAAC;AAC1D,OAAO,SAAS,MAAM,yBAAyB,CAAC;AAOhD,MAAM,CAAC,MAAM,KAAK,GAAe,SAAuB,CAAC;AAEzD,MAAM,gBAAgB,GAAG,IAAI,GAAG,CAC9B,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,GAAG,EAAE,EAAE,CAAC,CAAC,KAAK,EAAE,GAAG,CAAU,CAAC,CACzD,CAAC;AAEF,MAAM,gBAAgB,GAAG,IAAI,GAAG,CAC9B,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,GAAG,EAAE,EAAE,CAAC,CAAC,KAAK,EAAE,GAAG,CAAU,CAAC,CACzD,CAAC;AAuBF,MAAM,CAAC,MAAM,UAAU,GAAG,CACxB,IAAY,EACZ,WAAmB,EACN,EAAE;IACf,MAAM,OAAO,GAAiB,eAAe,CAAC,IAAI,CAAC,CAAC;IACpD,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;IAClE,MAAM,OAAO,GAAG,OAAO,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAChF,MAAM,GAAG,GAAG,MAAM,CAAC,GAAG,CACpB,CAAC,KAAK,EAAE,EAAE,CAAC,gBAAgB,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,gBAAgB,CAAC,GAAG,CAAC,OAAO,CAAE,CACzE,CAAC;IACF,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,EAAE,WAAW,CAAC,CAAC;IACjD,MAAM,MAAM,GAAG,IAAI,KAAK,CAAS,WAAW,CAAC,CAAC,IAAI,CAChD,MAAM,CAAC,gBAAgB,CAAC,GAAG,CAAC,OAAO,CAAE,CAAC,CACvC,CAAC;IACF,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAChC,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IAC7B,CAAC;IACD,MAAM,WAAW,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC;IAC7C,OAAO,EAAE,GAAG,EAAE,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;AACvF,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,UAAU,GAAG;IACxB,GAAG,EAAE,gBAAgB,CAAC,GAAG,CAAC,OAAO,CAAE;IACnC,GAAG,EAAE,gBAAgB,CAAC,GAAG,CAAC,OAAO,CAAE;IACnC,GAAG,EAAE,gBAAgB,CAAC,GAAG,CAAC,OAAO,CAAE;IACnC,GAAG,EAAE,gBAAgB,CAAC,GAAG,CAAC,OAAO,CAAE;CACpC,CAAC;AAEF,MAAM,CAAC,MAAM,iBAAiB,GAAG,CAC/B,GAA+B,EAC/B,WAAuC,EACvC,WAAqB,EACV,EAAE;IACb,MAAM,QAAQ,GAAa,EAAE,CAAC;IAC9B,MAAM,UAAU,GAAgB,EAAE,CAAC;IACnC,IAAI,oBAAoB,GAAG,CAAC,CAAC;IAE7B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACpC,MAAM,OAAO,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QAC/B,IAAI,OAAO,KAAK,UAAU,CAAC,GAAG;YAAE,MAAM;QACtC,IAAI,OAAO,KAAK,UAAU,CAAC,GAAG;YAAE,SAAS;QACzC,IAAI,OAAO,KAAK,UAAU,CAAC,GAAG,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;YAAE,SAAS;QAElE,MAAM,OAAO,GAAG,KAAK,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QACvC,IAAI,OAAO,KAAK,SAAS,EAAE,CAAC;YAC1B,oBAAoB,IAAI,CAAC,CAAC;QAC5B,CAAC;QACD,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CACrB,CAAC,EACD,IAAI,CAAC,GAAG,CACN,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,EAC3B,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CACpD,CACF,CAAC;QACF,MAAM,SAAS,GAAG,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACpE,UAAU,CAAC,IAAI,CAAC;YACd,OAAO,EAAE,OAAO,IAAI,KAAK,CAAC,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC;YACjD,YAAY,EAAE,UAAU,CAAC,MAAM;YAC/B,SAAS;SACV,CAAC,CAAC;QACH,QAAQ,CAAC,IAAI,CAAC,OAAO,IAAI,KAAK,CAAC,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC;IAC1D,CAAC;IAED,IAAI,oBAAoB,GAAG,CAAC,IAAI,OAAO,OAAO,KAAK,WAAW,EAAE,CAAC;QAC/D,OAAO,CAAC,IAAI,CACV,mCAAmC,oBAAoB,6CAA6C,CACrG,CAAC;IACJ,CAAC;IAED,OAAO,EAAE,GAAG,EAAE,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,UAAU,EAAE,CAAC;AAChD,CAAC,CAAC"}