@gukhanmun/napi 0.1.0-dev.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +674 -0
- package/dist/index.d.ts +574 -0
- package/dist/index.js +272 -0
- package/package.json +60 -0
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,574 @@
|
|
|
1
|
+
//#region ../types/dist/index.d.ts
|
|
2
|
+
//#region index.d.ts
|
|
3
|
+
/**
|
|
4
|
+
* Canonical TypeScript API contract for Gukhanmun.
|
|
5
|
+
*
|
|
6
|
+
* This package contains only TypeScript type declarations and carries no
|
|
7
|
+
* runtime code. Both `@gukhanmun/wasm` and `@gukhanmun/napi` satisfy this
|
|
8
|
+
* contract structurally. All TSDoc lives here as the single source of
|
|
9
|
+
* truth for the JavaScript API.
|
|
10
|
+
*
|
|
11
|
+
* @module
|
|
12
|
+
*/
|
|
13
|
+
/**
|
|
14
|
+
* Named configuration preset that sets orthographic and lexical defaults.
|
|
15
|
+
*
|
|
16
|
+
* - `"ko-kr"` — South Korean orthography: dictionary-driven readings, the
|
|
17
|
+
* initial sound law applied to fallback fragments, per-block homophone
|
|
18
|
+
* disambiguation, and the bundled *Standard Korean Language Dictionary*
|
|
19
|
+
* (標準國語大辭典). Corresponds to Rust `Preset::KoKr`.
|
|
20
|
+
* - `"ko-kp"` — North Korean orthography: no initial sound law (래일,
|
|
21
|
+
* 류행, 녀자), no bundled dictionary. Corresponds to Rust `Preset::KoKp`.
|
|
22
|
+
*
|
|
23
|
+
* Both presets default `rendering` to `"hangul-only"` and `segmentation` to
|
|
24
|
+
* `"lattice"`. Individual options passed to {@link GukhanmunOptions} override
|
|
25
|
+
* the preset.
|
|
26
|
+
*/
|
|
27
|
+
type Preset = "ko-kr" | "ko-kp";
|
|
28
|
+
/**
|
|
29
|
+
* Controls how the renderer expands each converted hanja annotation into
|
|
30
|
+
* output text or markup. Corresponds to Rust `RenderMode`.
|
|
31
|
+
*
|
|
32
|
+
* - `"hangul-only"` — Emit only the hangul reading. When `homophone` or
|
|
33
|
+
* `require_hanja` is set on an annotation the reading is followed by the
|
|
34
|
+
* original hanja in parentheses: `한글(漢字)`. Corresponds to Rust
|
|
35
|
+
* `RenderMode::HangulOnly`.
|
|
36
|
+
* - `"hangul-hanja-parens"` — Always emit `한글(漢字)`. Corresponds to
|
|
37
|
+
* Rust `RenderMode::HangulHanjaParens`.
|
|
38
|
+
* - `"hanja-hangul-parens"` — Always emit `漢字(한글)`. Useful for
|
|
39
|
+
* academic and historical-document styles. Corresponds to Rust
|
|
40
|
+
* `RenderMode::HanjaHangulParens`.
|
|
41
|
+
* - `"ruby-on-hangul"` — Emit `<ruby>한글<rt>漢字</rt></ruby>`. Falls
|
|
42
|
+
* back to parentheses when the current scope does not permit inline markup
|
|
43
|
+
* (e.g., inside `<pre>`). Corresponds to Rust
|
|
44
|
+
* `RenderMode::Ruby(RubyBase::OnHangul)`.
|
|
45
|
+
* - `"ruby-on-hanja"` — Emit `<ruby>漢字<rt>한글</rt></ruby>`.
|
|
46
|
+
* Corresponds to Rust `RenderMode::Ruby(RubyBase::OnHanja)`.
|
|
47
|
+
* - `"original"` — Keep the original mixed-script form; only annotations
|
|
48
|
+
* with `require_hangul` or a user directive receive a hangul gloss, which
|
|
49
|
+
* appears either in parentheses or as a ruby element depending on
|
|
50
|
+
* {@link GukhanmunOptions.originalGloss}. Corresponds to Rust
|
|
51
|
+
* `RenderMode::Original`.
|
|
52
|
+
*/
|
|
53
|
+
type RenderMode = "hangul-only" | "hangul-hanja-parens" | "hanja-hangul-parens" | "ruby-on-hangul" | "ruby-on-hanja" | "original";
|
|
54
|
+
/**
|
|
55
|
+
* Selects how glosses are rendered when {@link RenderMode} is `"original"`.
|
|
56
|
+
*
|
|
57
|
+
* - `"parens"` — Wrap the gloss in parentheses: `漢字(한글)` (default).
|
|
58
|
+
* Corresponds to Rust `OriginalGloss::Parens`.
|
|
59
|
+
* - `"ruby"` — Wrap the gloss in a `<ruby>` element. Falls back to
|
|
60
|
+
* parentheses in scopes that do not permit inline markup. Corresponds to
|
|
61
|
+
* Rust `OriginalGloss::Ruby`.
|
|
62
|
+
*
|
|
63
|
+
* This option is ignored when `rendering` is not `"original"`.
|
|
64
|
+
*/
|
|
65
|
+
type OriginalGloss = "parens" | "ruby";
|
|
66
|
+
/**
|
|
67
|
+
* Controls how the engine segments a hanja-containing span into dictionary
|
|
68
|
+
* words and fallback fragments. Corresponds to Rust `SegmentationStrategy`.
|
|
69
|
+
*
|
|
70
|
+
* - `"lattice"` — Dynamic programming over all possible dictionary matches
|
|
71
|
+
* at each position; selects the segmentation that maximises dictionary
|
|
72
|
+
* coverage and then prefers fewer segments. This is the default and
|
|
73
|
+
* produces better results than greedy approaches when a longer prefix
|
|
74
|
+
* would leave a suffix uncovered by the dictionary. Corresponds to Rust
|
|
75
|
+
* `SegmentationStrategy::Lattice`.
|
|
76
|
+
* - `"eager"` — Left-to-right longest-match (greedy). Lower overhead per
|
|
77
|
+
* span at the cost of occasional mis-segmentation. Corresponds to Rust
|
|
78
|
+
* `SegmentationStrategy::Eager`.
|
|
79
|
+
*/
|
|
80
|
+
type Segmentation = "lattice" | "eager";
|
|
81
|
+
/**
|
|
82
|
+
* Controls how runs of hanja numerals are converted. Corresponds to Rust
|
|
83
|
+
* `NumeralStrategy`.
|
|
84
|
+
*
|
|
85
|
+
* | Strategy | `二〇一六年` | `十一月` | `一千二百三十四` |
|
|
86
|
+
* | --------------------- | ------------ | -------- | ---------------- |
|
|
87
|
+
* | `"hangul-phonetic"` | 이공일륙년 | 십일월 | 일천이백삼십사 |
|
|
88
|
+
* | `"positional-arabic"` | 2016년 | (n/a) | (n/a) |
|
|
89
|
+
* | `"additive-arabic"` | (n/a) | 11월 | 1234 |
|
|
90
|
+
* | `"smart"` | 2016년 | 11월 | 1234 |
|
|
91
|
+
*
|
|
92
|
+
* - `"hangul-phonetic"` — Read every digit character-by-character in
|
|
93
|
+
* Korean phonetics. This is Seonbi's behaviour and the preset default.
|
|
94
|
+
* Corresponds to Rust `NumeralStrategy::HangulPhonetic`.
|
|
95
|
+
* - `"positional-arabic"` — Treat a run of digit-only hanja
|
|
96
|
+
* (`〇一二三四五六七八九` and variants) as positional (place-value)
|
|
97
|
+
* notation and convert to Arabic. Corresponds to Rust
|
|
98
|
+
* `NumeralStrategy::PositionalArabic`.
|
|
99
|
+
* - `"additive-arabic"` — Parse sequences containing place markers
|
|
100
|
+
* (`十百千萬億兆京`) using stack-based accumulation and produce Arabic,
|
|
101
|
+
* respecting the Korean convention that bare `十` means 10 not `一十`.
|
|
102
|
+
* Corresponds to Rust `NumeralStrategy::AdditiveArabic`.
|
|
103
|
+
* - `"smart"` — Uses `"additive-arabic"` when a unit hanja follows the
|
|
104
|
+
* numeral (`年月日時分秒號世紀` and others); uses `"positional-arabic"`
|
|
105
|
+
* for pure-digit runs of four or more characters (year convention);
|
|
106
|
+
* otherwise falls back to `"hangul-phonetic"`. Corresponds to Rust
|
|
107
|
+
* `NumeralStrategy::Smart`.
|
|
108
|
+
*/
|
|
109
|
+
type NumeralStrategy = "hangul-phonetic" | "positional-arabic" | "additive-arabic" | "smart";
|
|
110
|
+
/**
|
|
111
|
+
* Defines the scope within which the homophone marker and first-occurrence
|
|
112
|
+
* filter track previously seen readings. Corresponds to Rust `ContextWindow`.
|
|
113
|
+
*
|
|
114
|
+
* - `"off"` — Disable the corresponding middleware entirely.
|
|
115
|
+
* - `"per-block"` — Reset at each block boundary (paragraph, list item,
|
|
116
|
+
* heading, …). This is the default for both homophone marking and first-
|
|
117
|
+
* occurrence filtering. In plain text, which has no block scopes, `per-block`
|
|
118
|
+
* is document-wide.
|
|
119
|
+
* - `"per-section"` — Reset at each heading boundary (HTML `<h1>`–`<h6>`,
|
|
120
|
+
* Markdown ATX/setext headings).
|
|
121
|
+
* - `"per-document"` — Track across the entire document. This buffers the
|
|
122
|
+
* entire token stream and is appropriate only for small inputs or when full
|
|
123
|
+
* accuracy matters more than latency.
|
|
124
|
+
*/
|
|
125
|
+
type ContextWindow = "off" | "per-block" | "per-section" | "per-document";
|
|
126
|
+
/**
|
|
127
|
+
* Controls how the pipeline handles reader errors encountered during HTML
|
|
128
|
+
* scanning. Corresponds to Rust `Recovery`.
|
|
129
|
+
*
|
|
130
|
+
* - `"strict"` — Propagate the error and stop (default).
|
|
131
|
+
* - `"lenient"` — Log the error via `tracing` and emit a verbatim token for
|
|
132
|
+
* the unrecognised region so that downstream tokens continue to flow.
|
|
133
|
+
*
|
|
134
|
+
* This option is meaningful only for `format: "html"`. Markdown parsing does
|
|
135
|
+
* not produce recoverable errors, so this option is ignored for Markdown input.
|
|
136
|
+
*/
|
|
137
|
+
type Recovery = "strict" | "lenient";
|
|
138
|
+
/**
|
|
139
|
+
* A single dictionary entry returned by a dictionary lookup.
|
|
140
|
+
*/
|
|
141
|
+
interface DictionaryEntry {
|
|
142
|
+
/** The hanja form (key), e.g. `"漢字"`. */
|
|
143
|
+
readonly hanja: string;
|
|
144
|
+
/** The hangul reading, e.g. `"한자"`. */
|
|
145
|
+
readonly reading: string;
|
|
146
|
+
/**
|
|
147
|
+
* When `true`, the renderer should always show the original hanja
|
|
148
|
+
* alongside the hangul reading, regardless of ambiguity.
|
|
149
|
+
*
|
|
150
|
+
* Corresponds to Rust `MatchMark::require_hanja`.
|
|
151
|
+
*/
|
|
152
|
+
readonly requireHanja?: boolean;
|
|
153
|
+
/**
|
|
154
|
+
* When `true`, the renderer should always show a hangul gloss alongside
|
|
155
|
+
* the original hanja (used with `rendering: "original"`).
|
|
156
|
+
*
|
|
157
|
+
* Corresponds to Rust `MatchMark::require_hangul`.
|
|
158
|
+
*/
|
|
159
|
+
readonly requireHangul?: boolean;
|
|
160
|
+
}
|
|
161
|
+
/**
|
|
162
|
+
* Specifies a dictionary loaded from a binary file or URL.
|
|
163
|
+
*
|
|
164
|
+
* The `data` field accepts:
|
|
165
|
+
* - A `BufferSource` (`ArrayBuffer` or `ArrayBufferView`) — supported in all
|
|
166
|
+
* environments.
|
|
167
|
+
* - A `URL` — resolved via `fetch` in browsers; via `node:fs/promises`
|
|
168
|
+
* in Node.js, Deno 2.0+, and Bun.
|
|
169
|
+
* - A `string` — treated as a filesystem path; supported in Node.js,
|
|
170
|
+
* Deno 2.0+, and Bun only. Throws in browser environments.
|
|
171
|
+
*
|
|
172
|
+
* At runtime, a `FileDictionarySource` is distinguished from other values by
|
|
173
|
+
* the presence of a `format` property (`"format" in source`).
|
|
174
|
+
*/
|
|
175
|
+
interface FileDictionarySource {
|
|
176
|
+
/**
|
|
177
|
+
* The binary dictionary data or a reference to where it can be loaded.
|
|
178
|
+
*
|
|
179
|
+
* Pass a `BufferSource` for data already in memory, a `URL` for a remote
|
|
180
|
+
* or local URL (resolved via `fetch` or `readFile`), or a path `string`
|
|
181
|
+
* for filesystem paths (Node.js / Deno 2.0+ / Bun only).
|
|
182
|
+
*/
|
|
183
|
+
readonly data: ArrayBuffer | ArrayBufferView | URL | string;
|
|
184
|
+
/**
|
|
185
|
+
* The on-disk format of the dictionary file.
|
|
186
|
+
*
|
|
187
|
+
* - `"fst"` — Gukhanmun FST file (`*.gukfst`); preferred for small
|
|
188
|
+
* WebAssembly bundles. Supported in all runtimes.
|
|
189
|
+
* - `"cdb"` — Gukhanmun CDB-trie file (`*.gukcdb`); preferred when code
|
|
190
|
+
* auditability or trivial mmap support matters. Requires a filesystem
|
|
191
|
+
* or in-memory bytes; supported in Node-API and (with `from_bytes`) in
|
|
192
|
+
* WASM builds that include the `cdb` feature.
|
|
193
|
+
*
|
|
194
|
+
* The `"tsv"` format is reserved for future use; passing it throws
|
|
195
|
+
* `GukhanmunError` with code `"unsupported-content-type"`.
|
|
196
|
+
*/
|
|
197
|
+
readonly format: "fst" | "cdb";
|
|
198
|
+
}
|
|
199
|
+
/**
|
|
200
|
+
* A dictionary source accepted by {@link GukhanmunOptions.dictionaries}.
|
|
201
|
+
*
|
|
202
|
+
* Currently only {@link FileDictionarySource} (binary file / URL / path) is
|
|
203
|
+
* supported. Sources are tried in array order; the first match wins.
|
|
204
|
+
*
|
|
205
|
+
* @example
|
|
206
|
+
* ```ts
|
|
207
|
+
* import { stdictFst } from "@gukhanmun/stdict-fst";
|
|
208
|
+
* const g = await load({ dictionaries: [await stdictFst()] });
|
|
209
|
+
* ```
|
|
210
|
+
*/
|
|
211
|
+
type DictionarySource = FileDictionarySource;
|
|
212
|
+
/**
|
|
213
|
+
* Fine-grained HTML preservation rules passed in
|
|
214
|
+
* {@link GukhanmunOptions.html}.
|
|
215
|
+
*
|
|
216
|
+
* These are additive: a scope is preserved when *any* rule matches. They
|
|
217
|
+
* correspond to the CLI flags `--html-preserve-class` and
|
|
218
|
+
* `--html-preserve-attr`, and to the Rust `Builder::html_preserve_when`
|
|
219
|
+
* predicate.
|
|
220
|
+
*/
|
|
221
|
+
interface HtmlOptions {
|
|
222
|
+
/**
|
|
223
|
+
* Class names whose containing element (and all descendants) should be
|
|
224
|
+
* treated as a preserved region — the engine skips their text content.
|
|
225
|
+
*
|
|
226
|
+
* Equivalent to passing `--html-preserve-class NAME` to the CLI one or
|
|
227
|
+
* more times.
|
|
228
|
+
*/
|
|
229
|
+
readonly preserveClasses?: readonly string[];
|
|
230
|
+
/**
|
|
231
|
+
* Attribute matchers; an element is preserved when it carries a matching
|
|
232
|
+
* attribute. Each entry is either:
|
|
233
|
+
* - A bare string — preserve any element that has the attribute, regardless
|
|
234
|
+
* of value (e.g. `"data-no-translate"`).
|
|
235
|
+
* - An object `{ name, value? }` — preserve elements where the attribute
|
|
236
|
+
* equals `value`, or has the attribute when `value` is omitted.
|
|
237
|
+
*
|
|
238
|
+
* Equivalent to `--html-preserve-attr KEY[=VALUE]` on the CLI.
|
|
239
|
+
*/
|
|
240
|
+
readonly preserveAttributes?: readonly (string | {
|
|
241
|
+
readonly name: string;
|
|
242
|
+
readonly value?: string;
|
|
243
|
+
})[];
|
|
244
|
+
}
|
|
245
|
+
/**
|
|
246
|
+
* Per-hanja rendering directives that override the dictionary's own marks.
|
|
247
|
+
*
|
|
248
|
+
* Each list contains hanja forms (exact string matches, e.g. `"漢字"`).
|
|
249
|
+
* JavaScript bindings expose only the literal-set form; glob and predicate
|
|
250
|
+
* variants are available in the Rust API only.
|
|
251
|
+
*
|
|
252
|
+
* Corresponds to Rust `UserDirectives` with `DirectiveAction::RequireHanja`,
|
|
253
|
+
* `DirectiveAction::RequireHangul`, and `DirectiveAction::SkipAnnotation`.
|
|
254
|
+
*/
|
|
255
|
+
interface Directives {
|
|
256
|
+
/**
|
|
257
|
+
* Hanja forms that must always be shown with their original hanja
|
|
258
|
+
* alongside the hangul reading, as if `requireHanja` were set in the
|
|
259
|
+
* dictionary.
|
|
260
|
+
*/
|
|
261
|
+
readonly requireHanja?: readonly string[];
|
|
262
|
+
/**
|
|
263
|
+
* Hanja forms that must always be shown with a hangul gloss alongside the
|
|
264
|
+
* original hanja (relevant for `rendering: "original"`).
|
|
265
|
+
*/
|
|
266
|
+
readonly requireHangul?: readonly string[];
|
|
267
|
+
/**
|
|
268
|
+
* Hanja forms whose annotation should be suppressed entirely; the renderer
|
|
269
|
+
* emits only the primary plain text form (hangul or hanja depending on
|
|
270
|
+
* `rendering`).
|
|
271
|
+
*/
|
|
272
|
+
readonly skipAnnotation?: readonly string[];
|
|
273
|
+
}
|
|
274
|
+
/**
|
|
275
|
+
* Full set of options passed to {@link GukhanmunFactory.load} (or the
|
|
276
|
+
* top-level `load` function) to configure a {@link Gukhanmun} instance.
|
|
277
|
+
*
|
|
278
|
+
* All fields are optional. When a `preset` is specified it supplies
|
|
279
|
+
* defaults; individual fields override those defaults. When no preset is
|
|
280
|
+
* given, `"ko-kr"` is implicitly used.
|
|
281
|
+
*/
|
|
282
|
+
interface GukhanmunOptions {
|
|
283
|
+
/**
|
|
284
|
+
* Named configuration preset. Defaults to `"ko-kr"`.
|
|
285
|
+
*
|
|
286
|
+
* @see {@link Preset}
|
|
287
|
+
*/
|
|
288
|
+
readonly preset?: Preset;
|
|
289
|
+
/**
|
|
290
|
+
* How annotations are rendered into output text or markup. Defaults to
|
|
291
|
+
* `"hangul-only"`.
|
|
292
|
+
*
|
|
293
|
+
* @see {@link RenderMode}
|
|
294
|
+
*/
|
|
295
|
+
readonly rendering?: RenderMode;
|
|
296
|
+
/**
|
|
297
|
+
* How glosses are rendered when `rendering` is `"original"`. Ignored for
|
|
298
|
+
* all other render modes. Defaults to `"parens"`.
|
|
299
|
+
*
|
|
300
|
+
* @see {@link OriginalGloss}
|
|
301
|
+
*/
|
|
302
|
+
readonly originalGloss?: OriginalGloss;
|
|
303
|
+
/**
|
|
304
|
+
* Hanja-span segmentation algorithm. Defaults to `"lattice"`.
|
|
305
|
+
*
|
|
306
|
+
* @see {@link Segmentation}
|
|
307
|
+
*/
|
|
308
|
+
readonly segmentation?: Segmentation;
|
|
309
|
+
/**
|
|
310
|
+
* How runs of hanja numerals are converted. Defaults to
|
|
311
|
+
* `"hangul-phonetic"`.
|
|
312
|
+
*
|
|
313
|
+
* @see {@link NumeralStrategy}
|
|
314
|
+
*/
|
|
315
|
+
readonly numerals?: NumeralStrategy;
|
|
316
|
+
/**
|
|
317
|
+
* Whether to apply the Korean initial sound law (頭音法則) to fallback
|
|
318
|
+
* phonetic readings. Defaults to `true` for `"ko-kr"` and `false` for
|
|
319
|
+
* `"ko-kp"`.
|
|
320
|
+
*
|
|
321
|
+
* Note: dictionary entries are assumed to encode the correct reading
|
|
322
|
+
* already; this flag only affects the character-by-character fallback path.
|
|
323
|
+
*/
|
|
324
|
+
readonly initialSoundLaw?: boolean;
|
|
325
|
+
/**
|
|
326
|
+
* Context window for homophone disambiguation. The `HomophoneMarker`
|
|
327
|
+
* middleware sets `homophone = true` on annotations whose hangul reading is
|
|
328
|
+
* shared by another hanja form within this window. Defaults to
|
|
329
|
+
* `"per-block"`.
|
|
330
|
+
*
|
|
331
|
+
* @see {@link ContextWindow}
|
|
332
|
+
*/
|
|
333
|
+
readonly homophoneWindow?: ContextWindow;
|
|
334
|
+
/**
|
|
335
|
+
* Context window for first-occurrence filtering. The
|
|
336
|
+
* `FirstOccurrenceFilter` middleware clears `requireHanja` /
|
|
337
|
+
* `requireHangul` on repeated occurrences of the same word within this
|
|
338
|
+
* window, so the gloss appears only the first time. Defaults to `"off"`
|
|
339
|
+
* (filter disabled) in both presets.
|
|
340
|
+
*
|
|
341
|
+
* @see {@link ContextWindow}
|
|
342
|
+
*/
|
|
343
|
+
readonly firstOccurrenceWindow?: ContextWindow;
|
|
344
|
+
/**
|
|
345
|
+
* Error recovery policy for HTML scanning. Defaults to `"strict"`.
|
|
346
|
+
* Ignored for non-HTML input formats.
|
|
347
|
+
*
|
|
348
|
+
* @see {@link Recovery}
|
|
349
|
+
*/
|
|
350
|
+
readonly recovery?: Recovery;
|
|
351
|
+
/**
|
|
352
|
+
* Ordered list of dictionary sources. Sources are queried in order;
|
|
353
|
+
* earlier entries take precedence. When omitted (or empty), only the
|
|
354
|
+
* fallback Unihan character map is used (no stdict).
|
|
355
|
+
*
|
|
356
|
+
* Unlike the `"ko-kr"` Rust preset, JavaScript presets do **not**
|
|
357
|
+
* automatically include a bundled dictionary. To use the *Standard Korean
|
|
358
|
+
* Language Dictionary*, add `@gukhanmun/stdict-fst` or
|
|
359
|
+
* `@gukhanmun/stdict-cdb` explicitly.
|
|
360
|
+
*
|
|
361
|
+
* @see {@link DictionarySource}
|
|
362
|
+
*/
|
|
363
|
+
readonly dictionaries?: readonly DictionarySource[];
|
|
364
|
+
/**
|
|
365
|
+
* Per-hanja rendering directives that override dictionary marks.
|
|
366
|
+
*
|
|
367
|
+
* @see {@link Directives}
|
|
368
|
+
*/
|
|
369
|
+
readonly directives?: Directives;
|
|
370
|
+
/**
|
|
371
|
+
* HTML-specific preservation rules. Ignored for non-HTML input formats.
|
|
372
|
+
*
|
|
373
|
+
* @see {@link HtmlOptions}
|
|
374
|
+
*/
|
|
375
|
+
readonly html?: HtmlOptions;
|
|
376
|
+
}
|
|
377
|
+
/**
|
|
378
|
+
* Input / output format for {@link Gukhanmun.convert} and
|
|
379
|
+
* {@link Gukhanmun.stream}.
|
|
380
|
+
*
|
|
381
|
+
* - `"text"` — Plain text (default). No markup interpretation; ruby
|
|
382
|
+
* rendering falls back to parentheses.
|
|
383
|
+
* - `"html"` — HTML fragment. The scanner is fragment-oriented and recovers
|
|
384
|
+
* from minor malformations.
|
|
385
|
+
* - `"markdown"` — CommonMark Markdown (GFM disabled by default).
|
|
386
|
+
* - `{ format: "markdown"; gfm?: boolean }` — Markdown with optional GFM
|
|
387
|
+
* extensions. Set `gfm: true` to enable GitHub Flavored Markdown tables,
|
|
388
|
+
* strikethrough, and task lists.
|
|
389
|
+
*
|
|
390
|
+
* The object form `{ format: "markdown" }` is equivalent to the string
|
|
391
|
+
* `"markdown"`.
|
|
392
|
+
*/
|
|
393
|
+
type Format = "text" | "html" | "markdown" | {
|
|
394
|
+
readonly format: "markdown";
|
|
395
|
+
readonly gfm?: boolean;
|
|
396
|
+
};
|
|
397
|
+
/**
|
|
398
|
+
* A configured hanja-to-hangul converter. Created by calling
|
|
399
|
+
* {@link GukhanmunFactory.load} (or the top-level `load` function).
|
|
400
|
+
*
|
|
401
|
+
* The instance is immutable after creation; call `load` again to obtain a
|
|
402
|
+
* converter with different options.
|
|
403
|
+
*/
|
|
404
|
+
interface Gukhanmun {
|
|
405
|
+
/**
|
|
406
|
+
* Converts `source` to hangul in one shot. Buffers the entire input
|
|
407
|
+
* before returning.
|
|
408
|
+
*
|
|
409
|
+
* @param source - The text to convert.
|
|
410
|
+
* @param format - Input / output format. Defaults to `"text"`.
|
|
411
|
+
* @returns The converted text.
|
|
412
|
+
* @throws {@link GukhanmunError} on conversion failure.
|
|
413
|
+
*/
|
|
414
|
+
convert(source: string, format?: Format): string;
|
|
415
|
+
/**
|
|
416
|
+
* Returns a `TransformStream<string, string>` that converts chunks
|
|
417
|
+
* incrementally. Chunks are JavaScript strings; byte-level encoding is
|
|
418
|
+
* the caller's responsibility (`TextDecoderStream` / `TextEncoderStream`).
|
|
419
|
+
*
|
|
420
|
+
* The stream guarantees that the concatenated output equals the result of
|
|
421
|
+
* calling `convert` on the concatenated input, regardless of chunk
|
|
422
|
+
* boundaries. Document-wide middlewares (e.g., homophone marking with
|
|
423
|
+
* `homophoneWindow: "per-document"`) buffer until the writable side is
|
|
424
|
+
* closed.
|
|
425
|
+
*
|
|
426
|
+
* @param format - Input / output format. Defaults to `"text"`.
|
|
427
|
+
* @returns A platform `TransformStream<string, string>`.
|
|
428
|
+
* @throws {@link GukhanmunError} on initialisation failure (not on chunk
|
|
429
|
+
* errors; those are signalled via the stream's error channel).
|
|
430
|
+
*/
|
|
431
|
+
stream(format?: Format): TransformStream<string, string>;
|
|
432
|
+
/**
|
|
433
|
+
* Read-only view of the resolved options (after preset defaults are
|
|
434
|
+
* applied). Excludes `dictionaries`, `directives`, `html`, and
|
|
435
|
+
* `originalGloss`, which are not meaningfully representable as plain
|
|
436
|
+
* values.
|
|
437
|
+
*/
|
|
438
|
+
readonly options: Readonly<Required<Omit<GukhanmunOptions, "dictionaries" | "directives" | "html" | "originalGloss">>>;
|
|
439
|
+
}
|
|
440
|
+
/**
|
|
441
|
+
* Factory interface satisfied by both `@gukhanmun/wasm` and
|
|
442
|
+
* `@gukhanmun/napi`.
|
|
443
|
+
*
|
|
444
|
+
* @example
|
|
445
|
+
* ```ts
|
|
446
|
+
* import { load } from "@gukhanmun/wasm";
|
|
447
|
+
* import { stdictFst } from "@gukhanmun/stdict-fst";
|
|
448
|
+
*
|
|
449
|
+
* const g = await load({
|
|
450
|
+
* preset: "ko-kr",
|
|
451
|
+
* dictionaries: [await stdictFst()],
|
|
452
|
+
* });
|
|
453
|
+
* console.log(g.convert("漢字를 한글로"));
|
|
454
|
+
* ```
|
|
455
|
+
*/
|
|
456
|
+
interface GukhanmunFactory {
|
|
457
|
+
/**
|
|
458
|
+
* Loads and initialises a {@link Gukhanmun} converter with the given
|
|
459
|
+
* options.
|
|
460
|
+
*
|
|
461
|
+
* In the WASM implementation this involves asynchronous `.wasm` binary
|
|
462
|
+
* initialisation; in the Node-API implementation the native addon is
|
|
463
|
+
* synchronously ready but still returns a `Promise` for API uniformity.
|
|
464
|
+
* Dictionary sources with `URL` or string `data` are fetched / read during
|
|
465
|
+
* this call.
|
|
466
|
+
*
|
|
467
|
+
* @param options - Conversion options. All fields are optional; unset
|
|
468
|
+
* fields inherit defaults from the selected `preset` (or `"ko-kr"` when
|
|
469
|
+
* no preset is given).
|
|
470
|
+
* @returns A ready-to-use {@link Gukhanmun} instance.
|
|
471
|
+
* @throws {@link GukhanmunError} when an option value is unrecognised
|
|
472
|
+
* (`code: "invalid-input"`) or a dictionary fails to load
|
|
473
|
+
* (`code: "dictionary-load"`).
|
|
474
|
+
*/
|
|
475
|
+
load(options?: GukhanmunOptions): Promise<Gukhanmun>;
|
|
476
|
+
}
|
|
477
|
+
/**
|
|
478
|
+
* Top-level entry point exported by both `@gukhanmun/wasm` and
|
|
479
|
+
* `@gukhanmun/napi` as a named export.
|
|
480
|
+
*
|
|
481
|
+
* Equivalent to `new GukhanmunFactory().load(options)`. Declared here so
|
|
482
|
+
* that code that `import { load }` from either implementation package
|
|
483
|
+
* type-checks against the same signature.
|
|
484
|
+
*/
|
|
485
|
+
|
|
486
|
+
/**
|
|
487
|
+
* Discriminant code carried by every {@link GukhanmunError}.
|
|
488
|
+
*
|
|
489
|
+
* - `"dictionary-load"` — A dictionary file could not be opened, read, or
|
|
490
|
+
* decoded.
|
|
491
|
+
* - `"segmentation"` — The lattice segmenter encountered an internal
|
|
492
|
+
* inconsistency.
|
|
493
|
+
* - `"invalid-reading"` — A dictionary entry's hangul reading is not valid
|
|
494
|
+
* hangul.
|
|
495
|
+
* - `"html-scan"` — The HTML scanner encountered an unrecoverable error.
|
|
496
|
+
* - `"html-malformed-attr"` — An HTML attribute string could not be parsed.
|
|
497
|
+
* - `"markdown"` — The Markdown adapter encountered a parsing error.
|
|
498
|
+
* - `"unsupported-content-type"` — An unrecognised format string was passed
|
|
499
|
+
* to `convert` or `stream`.
|
|
500
|
+
* - `"invalid-input"` — An option value is not in the expected set (e.g. an
|
|
501
|
+
* unrecognised preset or render mode string).
|
|
502
|
+
* - `"io"` — An I/O error occurred (file read, network, …).
|
|
503
|
+
* - `"internal"` — An internal invariant was violated; this is a bug.
|
|
504
|
+
* - `"other"` — Any other error not covered by the above codes.
|
|
505
|
+
*/
|
|
506
|
+
type ErrorCode = "dictionary-load" | "segmentation" | "invalid-reading" | "html-scan" | "html-malformed-attr" | "markdown" | "unsupported-content-type" | "invalid-input" | "io" | "internal" | "other";
|
|
507
|
+
/**
|
|
508
|
+
* Error class thrown by all Gukhanmun operations.
|
|
509
|
+
*
|
|
510
|
+
* Declared here as a `declare class` so this package remains purely
|
|
511
|
+
* type-level. The actual class (with identical shape) is provided by each
|
|
512
|
+
* runtime package (`@gukhanmun/wasm` and `@gukhanmun/napi`).
|
|
513
|
+
*
|
|
514
|
+
* The `chain` property exposes the Rust `Error::source()` chain materialised
|
|
515
|
+
* at the FFI boundary, allowing callers to inspect underlying causes without
|
|
516
|
+
* additional FFI calls.
|
|
517
|
+
*/
|
|
518
|
+
//#endregion
|
|
519
|
+
//#region index.d.ts
|
|
520
|
+
/**
|
|
521
|
+
* Error thrown by `{@link load}`, `{@link Gukhanmun.convert}`, and
|
|
522
|
+
* `{@link Gukhanmun.stream}` when the Rust engine reports a failure.
|
|
523
|
+
*
|
|
524
|
+
* `code` identifies the failure class; `chain` carries the full causal chain
|
|
525
|
+
* materialised at the FFI boundary so callers do not need additional round
|
|
526
|
+
* trips.
|
|
527
|
+
*/
|
|
528
|
+
declare class GukhanmunError extends Error {
|
|
529
|
+
/**
|
|
530
|
+
* Machine-readable error code.
|
|
531
|
+
*
|
|
532
|
+
* @see {@link ErrorCode}
|
|
533
|
+
*/
|
|
534
|
+
readonly code: ErrorCode;
|
|
535
|
+
/**
|
|
536
|
+
* Full causal chain from the Rust `Error::source()` traversal, materialised
|
|
537
|
+
* at the FFI boundary. The first element is the root cause; the last is
|
|
538
|
+
* the immediate error.
|
|
539
|
+
*/
|
|
540
|
+
readonly chain: readonly {
|
|
541
|
+
readonly code: ErrorCode;
|
|
542
|
+
readonly message: string;
|
|
543
|
+
}[];
|
|
544
|
+
/**
|
|
545
|
+
* Creates a new `GukhanmunError`.
|
|
546
|
+
*
|
|
547
|
+
* @param code - Machine-readable error code.
|
|
548
|
+
* @param message - Human-readable description.
|
|
549
|
+
* @param chain - Optional causal chain.
|
|
550
|
+
*/
|
|
551
|
+
constructor(code: ErrorCode, message: string, chain?: readonly {
|
|
552
|
+
code: ErrorCode;
|
|
553
|
+
message: string;
|
|
554
|
+
}[]);
|
|
555
|
+
}
|
|
556
|
+
/**
|
|
557
|
+
* Creates a Gukhanmun converter with the given options.
|
|
558
|
+
*
|
|
559
|
+
* The native addon is synchronously ready; dictionaries supplied via
|
|
560
|
+
* `{@link GukhanmunOptions.dictionaries}` are fetched or read from disk and
|
|
561
|
+
* passed to the Rust engine as `FileDictionarySource` values.
|
|
562
|
+
*
|
|
563
|
+
* Note: unlike the Rust `ko-kr` preset, the JavaScript preset never includes a
|
|
564
|
+
* bundled dictionary. Pass `dictionaries: [await stdictFst()]` to include the
|
|
565
|
+
* Standard Korean Language Dictionary.
|
|
566
|
+
*
|
|
567
|
+
* @param options - Conversion options. All fields are optional; defaults match
|
|
568
|
+
* the `ko-kr` preset.
|
|
569
|
+
* @returns A `{@link Gukhanmun}` instance.
|
|
570
|
+
* @throws {@link GukhanmunError} on invalid options or dictionary load failure.
|
|
571
|
+
*/
|
|
572
|
+
declare function load(options?: GukhanmunOptions): Promise<Gukhanmun>;
|
|
573
|
+
//#endregion
|
|
574
|
+
export { type ContextWindow, type DictionaryEntry, type DictionarySource, type Directives, type ErrorCode, type FileDictionarySource, type Format, type Gukhanmun, GukhanmunError, type GukhanmunFactory, type GukhanmunOptions, type HtmlOptions, type NumeralStrategy, type OriginalGloss, type Preset, type Recovery, type RenderMode, type Segmentation, load };
|