lindera-wasm-web-ipadic 1.2.1 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -12,36 +12,36 @@ WebAssembly of Lindera
12
12
 
13
13
  ### Web
14
14
 
15
- - <https://www.npmjs.com/package/lindera-wasm-web-cjk>
15
+ - <https://www.npmjs.com/package/lindera-wasm-web-cjk>
16
16
  Lindera WASM with CJK dictionaries (IPADIC, ko-dic, CC-CEDICT) for Web
17
17
 
18
- - <https://www.npmjs.com/package/lindera-wasm-web-ipadic>
18
+ - <https://www.npmjs.com/package/lindera-wasm-web-ipadic>
19
19
  Lindera WASM with Japanese dictionary (IPADIC) for Web
20
20
 
21
- - <https://www.npmjs.com/package/lindera-wasm-web-unidic>
21
+ - <https://www.npmjs.com/package/lindera-wasm-web-unidic>
22
22
  Lindera WASM with Japanese dictionary (UniDic) for Web
23
23
 
24
- - <https://www.npmjs.com/package/lindera-wasm-web-ko-dic>
24
+ - <https://www.npmjs.com/package/lindera-wasm-web-ko-dic>
25
25
  Lindera WASM with Korean dictionary (ko-dic) for Web
26
26
 
27
- - <https://www.npmjs.com/package/lindera-wasm-web-cc-cedict>
27
+ - <https://www.npmjs.com/package/lindera-wasm-web-cc-cedict>
28
28
  Lindera WASM with Chinese dictionary (CC-CEDICT) for Web
29
29
 
30
30
  ### Node.js
31
31
 
32
- - <https://www.npmjs.com/package/lindera-wasm-nodejs-cjk>
32
+ - <https://www.npmjs.com/package/lindera-wasm-nodejs-cjk>
33
33
  Lindera WASM with CJK dictionaries (IPADIC, ko-dic, CC-CEDICT) for Node.js
34
34
 
35
- - <https://www.npmjs.com/package/lindera-wasm-nodejs-ipadic>
35
+ - <https://www.npmjs.com/package/lindera-wasm-nodejs-ipadic>
36
36
  Lindera WASM with Japanese dictionary (IPADIC) for Node.js
37
37
 
38
- - <https://www.npmjs.com/package/lindera-wasm-nodejs-unidic>
38
+ - <https://www.npmjs.com/package/lindera-wasm-nodejs-unidic>
39
39
  Lindera WASM with Japanese dictionary (UniDic) for Node.js
40
40
 
41
- - <https://www.npmjs.com/package/lindera-wasm-nodejs-ko-dic>
41
+ - <https://www.npmjs.com/package/lindera-wasm-nodejs-ko-dic>
42
42
  Lindera WASM with Korean dictionary (ko-dic) for Node.js
43
43
 
44
- - <https://www.npmjs.com/package/lindera-wasm-nodejs-cc-cedict>
44
+ - <https://www.npmjs.com/package/lindera-wasm-nodejs-cc-cedict>
45
45
  Lindera WASM with Chinese dictionary (CC-CEDICT) for Node.js
46
46
 
47
47
  ## Usage
@@ -96,7 +96,6 @@ export default defineConfig({
96
96
  and set the `content_security_policy` to contains `wasm-unsafe-eval` in manifest.json:
97
97
 
98
98
  ```json
99
- // manifest.json
100
99
  "content_security_policy": {
101
100
  "extension_pages": "script-src 'self' 'wasm-unsafe-eval';"
102
101
  }
package/lindera_wasm.d.ts CHANGED
@@ -1,42 +1,6 @@
1
1
  /* tslint:disable */
2
2
  /* eslint-disable */
3
- /**
4
- * Gets the version of the lindera-wasm library.
5
- *
6
- * # Returns
7
- *
8
- * The version string of the library (e.g., "1.0.0").
9
- *
10
- * # Examples
11
- *
12
- * ```javascript
13
- * import { getVersion } from 'lindera-wasm';
14
- * console.log(getVersion()); // "1.0.0"
15
- * ```
16
- */
17
- export function getVersion(): string;
18
- /**
19
- * A tokenizer for morphological analysis.
20
- *
21
- * The `Tokenizer` performs text tokenization based on the configuration
22
- * provided by [`TokenizerBuilder`].
23
- *
24
- * # Examples
25
- *
26
- * ```javascript
27
- * const builder = new TokenizerBuilder();
28
- * builder.setDictionary("embedded://ipadic");
29
- * builder.setMode("normal");
30
- *
31
- * const tokenizer = builder.build();
32
- * const tokens = tokenizer.tokenize("関西国際空港");
33
- * console.log(tokens);
34
- * // Output: [
35
- * // { surface: "関西国際空港", ... },
36
- * // ...
37
- * // ]
38
- * ```
39
- */
3
+
40
4
  export class Tokenizer {
41
5
  private constructor();
42
6
  free(): void;
@@ -74,215 +38,213 @@ export class Tokenizer {
74
38
  */
75
39
  tokenize(input_text: string): any;
76
40
  }
77
- /**
78
- * Builder for creating a [`Tokenizer`] instance.
79
- *
80
- * `TokenizerBuilder` provides a fluent API for configuring and building a tokenizer
81
- * with various options such as dictionary selection, tokenization mode, character filters,
82
- * and token filters.
83
- *
84
- * # Examples
85
- *
86
- * ```javascript
87
- * const builder = new TokenizerBuilder();
88
- * builder.setDictionary("embedded://ipadic");
89
- * builder.setMode("normal");
90
- * builder.setKeepWhitespace(false);
91
- * builder.appendCharacterFilter("unicode_normalize", { "kind": "nfkc" });
92
- * builder.appendTokenFilter("lowercase");
93
- *
94
- * const tokenizer = builder.build();
95
- * ```
96
- */
41
+
97
42
  export class TokenizerBuilder {
98
43
  free(): void;
99
44
  [Symbol.dispose](): void;
100
45
  /**
101
- * Creates a new `TokenizerBuilder` instance.
102
- *
103
- * # Returns
104
- *
105
- * A new `TokenizerBuilder` instance.
46
+ * Sets the dictionary to use for tokenization.
106
47
  *
107
- * # Errors
48
+ * # Parameters
108
49
  *
109
- * Returns an error if the builder cannot be initialized.
50
+ * - `uri`: The dictionary URI. Valid embedded dictionaries are:
51
+ * - `"embedded://ipadic"`: Japanese IPADIC dictionary
52
+ * - `"embedded://unidic"`: Japanese UniDic dictionary
53
+ * - `"embedded://ko-dic"`: Korean ko-dic dictionary
54
+ * - `"embedded://cc-cedict"`: Chinese CC-CEDICT dictionary
110
55
  *
111
56
  * # Examples
112
57
  *
113
58
  * ```javascript
114
- * const builder = new TokenizerBuilder();
59
+ * builder.setDictionary("embedded://ipadic");
115
60
  * ```
116
61
  */
117
- constructor();
62
+ setDictionary(uri: string): void;
118
63
  /**
119
- * Builds and returns a configured [`Tokenizer`] instance.
64
+ * Appends a token filter to the tokenization pipeline.
120
65
  *
121
- * This method consumes the builder and creates the final tokenizer with all
122
- * configured settings.
66
+ * Token filters transform or filter the tokens after tokenization.
123
67
  *
124
- * # Returns
68
+ * # Parameters
125
69
  *
126
- * A configured `Tokenizer` instance.
70
+ * - `name`: The name of the token filter (e.g., `"lowercase"`, `"japanese_number"`).
71
+ * - `args`: A JavaScript object containing filter-specific arguments.
127
72
  *
128
73
  * # Errors
129
74
  *
130
- * Returns an error if the tokenizer cannot be built with the current configuration.
75
+ * Returns an error if the arguments cannot be parsed.
131
76
  *
132
77
  * # Examples
133
78
  *
134
79
  * ```javascript
135
- * const builder = new TokenizerBuilder();
136
- * builder.setDictionary("embedded://ipadic");
137
- * const tokenizer = builder.build();
80
+ * builder.appendTokenFilter("lowercase");
81
+ * builder.appendTokenFilter("japanese_number", { "tags": ["名詞,数"] });
138
82
  * ```
139
83
  */
140
- build(): Tokenizer;
84
+ appendTokenFilter(name: string, args: any): void;
141
85
  /**
142
- * Sets the tokenization mode.
86
+ * Sets whether to keep whitespace tokens in the output.
143
87
  *
144
88
  * # Parameters
145
89
  *
146
- * - `mode`: The tokenization mode. Valid values are:
147
- * - `"normal"`: Standard tokenization
148
- * - `"decompose"`: Decomposes compound words into their components
149
- *
150
- * # Errors
151
- *
152
- * Returns an error if the mode string is invalid.
90
+ * - `keep`: If `true`, whitespace tokens are preserved; if `false`, they are removed.
153
91
  *
154
92
  * # Examples
155
93
  *
156
94
  * ```javascript
157
- * builder.setMode("normal");
95
+ * builder.setKeepWhitespace(false); // Remove whitespace tokens
158
96
  * // or
159
- * builder.setMode("decompose");
97
+ * builder.setKeepWhitespace(true); // Keep whitespace tokens
160
98
  * ```
161
99
  */
162
- setMode(mode: string): void;
100
+ setKeepWhitespace(keep: boolean): void;
163
101
  /**
164
- * Sets the dictionary to use for tokenization.
102
+ * Sets a user-defined dictionary.
103
+ *
104
+ * User dictionaries allow you to add custom words and their properties
105
+ * to supplement the main dictionary.
165
106
  *
166
107
  * # Parameters
167
108
  *
168
- * - `uri`: The dictionary URI. Valid embedded dictionaries are:
169
- * - `"embedded://ipadic"`: Japanese IPADIC dictionary
170
- * - `"embedded://unidic"`: Japanese UniDic dictionary
171
- * - `"embedded://ko-dic"`: Korean ko-dic dictionary
172
- * - `"embedded://cc-cedict"`: Chinese CC-CEDICT dictionary
109
+ * - `uri`: The URI to the user dictionary file.
173
110
  *
174
111
  * # Examples
175
112
  *
176
113
  * ```javascript
177
- * builder.setDictionary("embedded://ipadic");
114
+ * builder.setUserDictionary("path/to/user_dict.csv");
178
115
  * ```
179
116
  */
180
- setDictionary(uri: string): void;
117
+ setUserDictionary(uri: string): void;
181
118
  /**
182
- * Sets a user-defined dictionary.
119
+ * Appends a character filter to the tokenization pipeline.
183
120
  *
184
- * User dictionaries allow you to add custom words and their properties
185
- * to supplement the main dictionary.
121
+ * Character filters transform the input text before tokenization.
186
122
  *
187
123
  * # Parameters
188
124
  *
189
- * - `uri`: The URI to the user dictionary file.
125
+ * - `name`: The name of the character filter (e.g., `"unicode_normalize"`).
126
+ * - `args`: A JavaScript object containing filter-specific arguments.
127
+ *
128
+ * # Errors
129
+ *
130
+ * Returns an error if the arguments cannot be parsed.
190
131
  *
191
132
  * # Examples
192
133
  *
193
134
  * ```javascript
194
- * builder.setUserDictionary("path/to/user_dict.csv");
135
+ * builder.appendCharacterFilter("unicode_normalize", { "kind": "nfkc" });
195
136
  * ```
196
137
  */
197
- setUserDictionary(uri: string): void;
138
+ appendCharacterFilter(name: string, args: any): void;
198
139
  /**
199
- * Sets whether to keep whitespace tokens in the output.
140
+ * Creates a new `TokenizerBuilder` instance.
200
141
  *
201
- * # Parameters
142
+ * # Returns
202
143
  *
203
- * - `keep`: If `true`, whitespace tokens are preserved; if `false`, they are removed.
144
+ * A new `TokenizerBuilder` instance.
145
+ *
146
+ * # Errors
147
+ *
148
+ * Returns an error if the builder cannot be initialized.
204
149
  *
205
150
  * # Examples
206
151
  *
207
152
  * ```javascript
208
- * builder.setKeepWhitespace(false); // Remove whitespace tokens
209
- * // or
210
- * builder.setKeepWhitespace(true); // Keep whitespace tokens
153
+ * const builder = new TokenizerBuilder();
211
154
  * ```
212
155
  */
213
- setKeepWhitespace(keep: boolean): void;
156
+ constructor();
214
157
  /**
215
- * Appends a character filter to the tokenization pipeline.
158
+ * Builds and returns a configured [`Tokenizer`] instance.
216
159
  *
217
- * Character filters transform the input text before tokenization.
160
+ * This method consumes the builder and creates the final tokenizer with all
161
+ * configured settings.
218
162
  *
219
- * # Parameters
163
+ * # Returns
220
164
  *
221
- * - `name`: The name of the character filter (e.g., `"unicode_normalize"`).
222
- * - `args`: A JavaScript object containing filter-specific arguments.
165
+ * A configured `Tokenizer` instance.
223
166
  *
224
167
  * # Errors
225
168
  *
226
- * Returns an error if the arguments cannot be parsed.
169
+ * Returns an error if the tokenizer cannot be built with the current configuration.
227
170
  *
228
171
  * # Examples
229
172
  *
230
173
  * ```javascript
231
- * builder.appendCharacterFilter("unicode_normalize", { "kind": "nfkc" });
174
+ * const builder = new TokenizerBuilder();
175
+ * builder.setDictionary("embedded://ipadic");
176
+ * const tokenizer = builder.build();
232
177
  * ```
233
178
  */
234
- appendCharacterFilter(name: string, args: any): void;
179
+ build(): Tokenizer;
235
180
  /**
236
- * Appends a token filter to the tokenization pipeline.
237
- *
238
- * Token filters transform or filter the tokens after tokenization.
181
+ * Sets the tokenization mode.
239
182
  *
240
183
  * # Parameters
241
184
  *
242
- * - `name`: The name of the token filter (e.g., `"lowercase"`, `"japanese_number"`).
243
- * - `args`: A JavaScript object containing filter-specific arguments.
185
+ * - `mode`: The tokenization mode. Valid values are:
186
+ * - `"normal"`: Standard tokenization
187
+ * - `"decompose"`: Decomposes compound words into their components
244
188
  *
245
189
  * # Errors
246
190
  *
247
- * Returns an error if the arguments cannot be parsed.
191
+ * Returns an error if the mode string is invalid.
248
192
  *
249
193
  * # Examples
250
194
  *
251
195
  * ```javascript
252
- * builder.appendTokenFilter("lowercase");
253
- * builder.appendTokenFilter("japanese_number", { "tags": ["名詞,数"] });
196
+ * builder.setMode("normal");
197
+ * // or
198
+ * builder.setMode("decompose");
254
199
  * ```
255
200
  */
256
- appendTokenFilter(name: string, args: any): void;
201
+ setMode(mode: string): void;
257
202
  }
258
203
 
204
+ /**
205
+ * Gets the version of the lindera-wasm library.
206
+ *
207
+ * # Returns
208
+ *
209
+ * The version string of the library (e.g., "1.0.0").
210
+ *
211
+ * # Examples
212
+ *
213
+ * ```javascript
214
+ * import { getVersion } from 'lindera-wasm';
215
+ * console.log(getVersion()); // "1.0.0"
216
+ * ```
217
+ */
218
+ export function getVersion(): string;
219
+
259
220
  export type InitInput = RequestInfo | URL | Response | BufferSource | WebAssembly.Module;
260
221
 
261
222
  export interface InitOutput {
262
223
  readonly memory: WebAssembly.Memory;
263
- readonly getVersion: () => [number, number];
224
+ readonly __wbg_tokenizer_free: (a: number, b: number) => void;
264
225
  readonly __wbg_tokenizerbuilder_free: (a: number, b: number) => void;
265
- readonly tokenizerbuilder_new: () => [number, number, number];
226
+ readonly getVersion: () => [number, number];
227
+ readonly tokenizer_tokenize: (a: number, b: number, c: number) => [number, number, number];
228
+ readonly tokenizerbuilder_appendCharacterFilter: (a: number, b: number, c: number, d: any) => [number, number];
229
+ readonly tokenizerbuilder_appendTokenFilter: (a: number, b: number, c: number, d: any) => [number, number];
266
230
  readonly tokenizerbuilder_build: (a: number) => [number, number, number];
267
- readonly tokenizerbuilder_setMode: (a: number, b: number, c: number) => [number, number];
231
+ readonly tokenizerbuilder_new: () => [number, number, number];
268
232
  readonly tokenizerbuilder_setDictionary: (a: number, b: number, c: number) => [number, number];
269
- readonly tokenizerbuilder_setUserDictionary: (a: number, b: number, c: number) => [number, number];
270
233
  readonly tokenizerbuilder_setKeepWhitespace: (a: number, b: number) => [number, number];
271
- readonly tokenizerbuilder_appendCharacterFilter: (a: number, b: number, c: number, d: any) => [number, number];
272
- readonly tokenizerbuilder_appendTokenFilter: (a: number, b: number, c: number, d: any) => [number, number];
273
- readonly __wbg_tokenizer_free: (a: number, b: number) => void;
274
- readonly tokenizer_tokenize: (a: number, b: number, c: number) => [number, number, number];
234
+ readonly tokenizerbuilder_setMode: (a: number, b: number, c: number) => [number, number];
235
+ readonly tokenizerbuilder_setUserDictionary: (a: number, b: number, c: number) => [number, number];
275
236
  readonly __wbindgen_malloc: (a: number, b: number) => number;
276
237
  readonly __wbindgen_realloc: (a: number, b: number, c: number, d: number) => number;
277
238
  readonly __wbindgen_exn_store: (a: number) => void;
278
239
  readonly __externref_table_alloc: () => number;
279
- readonly __wbindgen_export_4: WebAssembly.Table;
280
- readonly __wbindgen_free: (a: number, b: number, c: number) => void;
240
+ readonly __wbindgen_externrefs: WebAssembly.Table;
281
241
  readonly __externref_table_dealloc: (a: number) => void;
242
+ readonly __wbindgen_free: (a: number, b: number, c: number) => void;
282
243
  readonly __wbindgen_start: () => void;
283
244
  }
284
245
 
285
246
  export type SyncInitInput = BufferSource | WebAssembly.Module;
247
+
286
248
  /**
287
249
  * Instantiates the given `module`, which can either be bytes or
288
250
  * a precompiled `WebAssembly.Module`.