lindera-wasm-ipadic-web 2.1.1 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +162 -27
- package/lindera_wasm.d.ts +62 -31
- package/lindera_wasm.js +80 -46
- package/lindera_wasm_bg.wasm +0 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -6,79 +6,214 @@ WebAssembly of Lindera
|
|
|
6
6
|
|
|
7
7
|
## Demo Application
|
|
8
8
|
|
|
9
|
-
- <https://lindera.github.io/lindera
|
|
9
|
+
- <https://lindera.github.io/lindera/demo/>
|
|
10
10
|
|
|
11
11
|
## npm
|
|
12
12
|
|
|
13
|
-
### Web
|
|
13
|
+
### For Web
|
|
14
14
|
|
|
15
|
-
- <https://www.npmjs.com/package/lindera-wasm-web
|
|
15
|
+
- <https://www.npmjs.com/package/lindera-wasm-web>
|
|
16
|
+
Lindera WASM without a dictionary for Web
|
|
17
|
+
|
|
18
|
+
- <https://www.npmjs.com/package/lindera-wasm-cjk-web>
|
|
16
19
|
Lindera WASM with CJK dictionaries (IPADIC, ko-dic, CC-CEDICT) for Web
|
|
17
20
|
|
|
18
|
-
- <https://www.npmjs.com/package/lindera-wasm-web
|
|
21
|
+
- <https://www.npmjs.com/package/lindera-wasm-ipadic-web>
|
|
19
22
|
Lindera WASM with Japanese dictionary (IPADIC) for Web
|
|
20
23
|
|
|
21
|
-
- <https://www.npmjs.com/package/lindera-wasm-web
|
|
24
|
+
- <https://www.npmjs.com/package/lindera-wasm-unidic-web>
|
|
22
25
|
Lindera WASM with Japanese dictionary (UniDic) for Web
|
|
23
26
|
|
|
24
|
-
- <https://www.npmjs.com/package/lindera-wasm-
|
|
27
|
+
- <https://www.npmjs.com/package/lindera-wasm-ko-dic-web>
|
|
25
28
|
Lindera WASM with Korean dictionary (ko-dic) for Web
|
|
26
29
|
|
|
27
|
-
- <https://www.npmjs.com/package/lindera-wasm-
|
|
30
|
+
- <https://www.npmjs.com/package/lindera-wasm-cc-cedict-web>
|
|
28
31
|
Lindera WASM with Chinese dictionary (CC-CEDICT) for Web
|
|
29
32
|
|
|
30
|
-
### Node.js
|
|
33
|
+
### For Node.js
|
|
34
|
+
|
|
35
|
+
- <https://www.npmjs.com/package/lindera-wasm-nodejs>
|
|
36
|
+
Lindera WASM without a dictionary for Node.js
|
|
31
37
|
|
|
32
|
-
- <https://www.npmjs.com/package/lindera-wasm-nodejs
|
|
38
|
+
- <https://www.npmjs.com/package/lindera-wasm-cjk-nodejs>
|
|
33
39
|
Lindera WASM with CJK dictionaries (IPADIC, ko-dic, CC-CEDICT) for Node.js
|
|
34
40
|
|
|
35
|
-
- <https://www.npmjs.com/package/lindera-wasm-nodejs
|
|
41
|
+
- <https://www.npmjs.com/package/lindera-wasm-ipadic-nodejs>
|
|
36
42
|
Lindera WASM with Japanese dictionary (IPADIC) for Node.js
|
|
37
43
|
|
|
38
|
-
- <https://www.npmjs.com/package/lindera-wasm-nodejs
|
|
44
|
+
- <https://www.npmjs.com/package/lindera-wasm-unidic-nodejs>
|
|
39
45
|
Lindera WASM with Japanese dictionary (UniDic) for Node.js
|
|
40
46
|
|
|
41
|
-
- <https://www.npmjs.com/package/lindera-wasm-
|
|
47
|
+
- <https://www.npmjs.com/package/lindera-wasm-ko-dic-nodejs>
|
|
42
48
|
Lindera WASM with Korean dictionary (ko-dic) for Node.js
|
|
43
49
|
|
|
44
|
-
- <https://www.npmjs.com/package/lindera-wasm-
|
|
50
|
+
- <https://www.npmjs.com/package/lindera-wasm-cc-cedict-nodejs>
|
|
45
51
|
Lindera WASM with Chinese dictionary (CC-CEDICT) for Node.js
|
|
46
52
|
|
|
53
|
+
### For bundler
|
|
54
|
+
|
|
55
|
+
- <https://www.npmjs.com/package/lindera-wasm-bundler>
|
|
56
|
+
Lindera WASM without a dictionary for Bundler
|
|
57
|
+
|
|
58
|
+
- <https://www.npmjs.com/package/lindera-wasm-cjk-bundler>
|
|
59
|
+
Lindera WASM with CJK dictionaries (IPADIC, ko-dic, CC-CEDICT) for Bundler
|
|
60
|
+
|
|
61
|
+
- <https://www.npmjs.com/package/lindera-wasm-ipadic-bundler>
|
|
62
|
+
Lindera WASM with Japanese dictionary (IPADIC) for Bundler
|
|
63
|
+
|
|
64
|
+
- <https://www.npmjs.com/package/lindera-wasm-unidic-bundler>
|
|
65
|
+
Lindera WASM with Japanese dictionary (UniDic) for Bundler
|
|
66
|
+
|
|
67
|
+
- <https://www.npmjs.com/package/lindera-wasm-ko-dic-bundler>
|
|
68
|
+
Lindera WASM with Korean dictionary (ko-dic) for Bundler
|
|
69
|
+
|
|
70
|
+
- <https://www.npmjs.com/package/lindera-wasm-cc-cedict-bundler>
|
|
71
|
+
Lindera WASM with Chinese dictionary (CC-CEDICT) for Bundler
|
|
72
|
+
|
|
47
73
|
## Usage
|
|
48
74
|
|
|
49
|
-
|
|
75
|
+
### Web Usage
|
|
50
76
|
|
|
51
|
-
|
|
52
|
-
import __wbg_init, { TokenizerBuilder } from 'lindera-wasm'
|
|
77
|
+
Use the `-web` packages for browser environments with `<script type="module">`:
|
|
53
78
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
79
|
+
```html
|
|
80
|
+
<script type="module">
|
|
81
|
+
import __wbg_init, { TokenizerBuilder } from 'https://cdn.jsdelivr.net/npm/lindera-wasm-ipadic-web/lindera_wasm.js';
|
|
82
|
+
|
|
83
|
+
__wbg_init().then(() => {
|
|
84
|
+
const builder = new TokenizerBuilder();
|
|
85
|
+
builder.setDictionary("embedded://ipadic");
|
|
86
|
+
builder.setMode("normal");
|
|
87
|
+
const tokenizer = builder.build();
|
|
88
|
+
|
|
89
|
+
const tokens = tokenizer.tokenize("すもももももももものうち");
|
|
90
|
+
tokens.forEach(token => {
|
|
91
|
+
console.log(`${token.surface}: ${token.details.join(", ")}`);
|
|
92
|
+
});
|
|
93
|
+
});
|
|
94
|
+
</script>
|
|
58
95
|
```
|
|
59
96
|
|
|
60
|
-
|
|
97
|
+
Or with a bundler:
|
|
98
|
+
|
|
99
|
+
```js
|
|
100
|
+
import __wbg_init, { TokenizerBuilder } from 'lindera-wasm-ipadic-web';
|
|
101
|
+
|
|
102
|
+
async function main() {
|
|
103
|
+
await __wbg_init();
|
|
104
|
+
|
|
105
|
+
const builder = new TokenizerBuilder();
|
|
106
|
+
builder.setDictionary("embedded://ipadic");
|
|
107
|
+
builder.setMode("normal");
|
|
108
|
+
const tokenizer = builder.build();
|
|
109
|
+
|
|
110
|
+
const tokens = tokenizer.tokenize("すもももももももものうち");
|
|
111
|
+
tokens.forEach(token => {
|
|
112
|
+
console.log(`${token.surface}: ${token.details.join(", ")}`);
|
|
113
|
+
});
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
main();
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
### Node.js Usage
|
|
120
|
+
|
|
121
|
+
Use the `-nodejs` packages for Node.js environments:
|
|
122
|
+
|
|
123
|
+
```js
|
|
124
|
+
const { TokenizerBuilder } = require('lindera-wasm-ipadic-nodejs');
|
|
125
|
+
|
|
126
|
+
const builder = new TokenizerBuilder();
|
|
127
|
+
builder.setDictionary("embedded://ipadic");
|
|
128
|
+
builder.setMode("normal");
|
|
129
|
+
const tokenizer = builder.build();
|
|
130
|
+
|
|
131
|
+
const tokens = tokenizer.tokenize("すもももももももものうち");
|
|
132
|
+
tokens.forEach(token => {
|
|
133
|
+
console.log(`${token.surface}: ${token.details.join(", ")}`);
|
|
134
|
+
});
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
Or with ESM:
|
|
138
|
+
|
|
139
|
+
```js
|
|
140
|
+
import { TokenizerBuilder } from 'lindera-wasm-ipadic-nodejs';
|
|
141
|
+
|
|
142
|
+
const builder = new TokenizerBuilder();
|
|
143
|
+
builder.setDictionary("embedded://ipadic");
|
|
144
|
+
builder.setMode("normal");
|
|
145
|
+
const tokenizer = builder.build();
|
|
146
|
+
|
|
147
|
+
const tokens = tokenizer.tokenize("すもももももももものうち");
|
|
148
|
+
tokens.forEach(token => {
|
|
149
|
+
console.log(`${token.surface}: ${token.details.join(", ")}`);
|
|
150
|
+
});
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
### Bundler Usage (Webpack, Rollup, etc.)
|
|
154
|
+
|
|
155
|
+
Use the `-bundler` packages for bundler environments:
|
|
156
|
+
|
|
157
|
+
```js
|
|
158
|
+
import __wbg_init, { TokenizerBuilder } from 'lindera-wasm-ipadic-bundler';
|
|
159
|
+
|
|
160
|
+
async function main() {
|
|
161
|
+
await __wbg_init();
|
|
162
|
+
|
|
163
|
+
const builder = new TokenizerBuilder();
|
|
164
|
+
builder.setDictionary("embedded://ipadic");
|
|
165
|
+
builder.setMode("normal");
|
|
166
|
+
const tokenizer = builder.build();
|
|
167
|
+
|
|
168
|
+
const tokens = tokenizer.tokenize("すもももももももものうち");
|
|
169
|
+
tokens.forEach(token => {
|
|
170
|
+
console.log(`${token.surface}: ${token.details.join(", ")}`);
|
|
171
|
+
});
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
main();
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
### Token Properties
|
|
178
|
+
|
|
179
|
+
Each token object has the following properties:
|
|
180
|
+
|
|
181
|
+
| Property | Type | Description |
|
|
182
|
+
| -------- | ---- | ----------- |
|
|
183
|
+
| `surface` | `string` | Surface form of the token |
|
|
184
|
+
| `byteStart` | `number` | Start byte position in the original text |
|
|
185
|
+
| `byteEnd` | `number` | End byte position in the original text |
|
|
186
|
+
| `position` | `number` | Position index of the token |
|
|
187
|
+
| `wordId` | `number` | Word ID in the dictionary |
|
|
188
|
+
| `details` | `string[]` | Morphological details array |
|
|
189
|
+
|
|
190
|
+
Methods:
|
|
191
|
+
|
|
192
|
+
- `getDetail(index)`: Returns the detail at the specified index, or `undefined` if not found
|
|
193
|
+
- `toJSON()`: Returns the token as a plain JavaScript object
|
|
194
|
+
|
|
195
|
+
### For Vite Projects
|
|
61
196
|
|
|
62
197
|
You should exclude this package in the `optimizeDeps`:
|
|
63
198
|
|
|
64
|
-
```
|
|
199
|
+
```js
|
|
65
200
|
// vite.config.js
|
|
66
201
|
import { defineConfig } from 'vite'
|
|
67
202
|
|
|
68
203
|
export default defineConfig({
|
|
69
204
|
optimizeDeps: {
|
|
70
205
|
exclude: [
|
|
71
|
-
"lindera-wasm"
|
|
206
|
+
"lindera-wasm-ipadic-web"
|
|
72
207
|
]
|
|
73
208
|
},
|
|
74
209
|
})
|
|
75
210
|
```
|
|
76
211
|
|
|
77
|
-
###
|
|
212
|
+
### For Browser Extension Development
|
|
78
213
|
|
|
79
|
-
Set the `cors` config in vite.config.js
|
|
214
|
+
Set the `cors` config in vite.config.js:
|
|
80
215
|
|
|
81
|
-
```
|
|
216
|
+
```js
|
|
82
217
|
// vite.config.js
|
|
83
218
|
import { defineConfig } from 'vite'
|
|
84
219
|
|
|
@@ -93,7 +228,7 @@ export default defineConfig({
|
|
|
93
228
|
})
|
|
94
229
|
```
|
|
95
230
|
|
|
96
|
-
|
|
231
|
+
And set the `content_security_policy` to contain `wasm-unsafe-eval` in manifest.json:
|
|
97
232
|
|
|
98
233
|
```json
|
|
99
234
|
"content_security_policy": {
|
package/lindera_wasm.d.ts
CHANGED
|
@@ -138,6 +138,19 @@ export class Token {
|
|
|
138
138
|
private constructor();
|
|
139
139
|
free(): void;
|
|
140
140
|
[Symbol.dispose](): void;
|
|
141
|
+
/**
|
|
142
|
+
* Returns the detail at the specified index.
|
|
143
|
+
*
|
|
144
|
+
* # Parameters
|
|
145
|
+
*
|
|
146
|
+
* - `index`: Index of the detail to retrieve.
|
|
147
|
+
*
|
|
148
|
+
* # Returns
|
|
149
|
+
*
|
|
150
|
+
* The detail string if found, otherwise undefined.
|
|
151
|
+
*/
|
|
152
|
+
getDetail(index: number): string | undefined;
|
|
153
|
+
toJSON(): any;
|
|
141
154
|
/**
|
|
142
155
|
* End byte position in the original text.
|
|
143
156
|
*/
|
|
@@ -150,6 +163,10 @@ export class Token {
|
|
|
150
163
|
* Morphological details of the token.
|
|
151
164
|
*/
|
|
152
165
|
details: string[];
|
|
166
|
+
/**
|
|
167
|
+
* Whether this token is an unknown word (not found in the dictionary).
|
|
168
|
+
*/
|
|
169
|
+
is_unknown: boolean;
|
|
153
170
|
/**
|
|
154
171
|
* Position index of the token.
|
|
155
172
|
*/
|
|
@@ -175,6 +192,16 @@ export class Tokenizer {
|
|
|
175
192
|
* Tokenizes the input text.
|
|
176
193
|
*/
|
|
177
194
|
tokenize(input_text: string): Token[];
|
|
195
|
+
/**
|
|
196
|
+
* Tokenizes the input text and returns N-best results.
|
|
197
|
+
*
|
|
198
|
+
* Returns an array of arrays, where each inner array contains Token JSON objects.
|
|
199
|
+
*/
|
|
200
|
+
tokenizeNbest(input_text: string, n: number, unique?: boolean | null, cost_threshold?: bigint | null): any;
|
|
201
|
+
/**
|
|
202
|
+
* Tokenizes the input text and returns N-best results (snake_case alias).
|
|
203
|
+
*/
|
|
204
|
+
tokenize_nbest(input_text: string, n: number, unique?: boolean | null, cost_threshold?: bigint | null): any;
|
|
178
205
|
}
|
|
179
206
|
|
|
180
207
|
/**
|
|
@@ -279,10 +306,44 @@ export type InitInput = RequestInfo | URL | Response | BufferSource | WebAssembl
|
|
|
279
306
|
|
|
280
307
|
export interface InitOutput {
|
|
281
308
|
readonly memory: WebAssembly.Memory;
|
|
309
|
+
readonly __wbg_tokenizer_free: (a: number, b: number) => void;
|
|
310
|
+
readonly __wbg_tokenizerbuilder_free: (a: number, b: number) => void;
|
|
311
|
+
readonly tokenizer_new: (a: number, b: number, c: number, d: number) => [number, number, number];
|
|
312
|
+
readonly tokenizer_tokenize: (a: number, b: number, c: number) => [number, number, number, number];
|
|
313
|
+
readonly tokenizer_tokenizeNbest: (a: number, b: number, c: number, d: number, e: number, f: number, g: bigint) => [number, number, number];
|
|
314
|
+
readonly tokenizer_tokenize_nbest: (a: number, b: number, c: number, d: number, e: number, f: number, g: bigint) => [number, number, number];
|
|
315
|
+
readonly tokenizerbuilder_appendCharacterFilter: (a: number, b: number, c: number, d: any) => [number, number];
|
|
316
|
+
readonly tokenizerbuilder_appendTokenFilter: (a: number, b: number, c: number, d: any) => [number, number];
|
|
317
|
+
readonly tokenizerbuilder_append_character_filter: (a: number, b: number, c: number, d: any) => [number, number];
|
|
318
|
+
readonly tokenizerbuilder_append_token_filter: (a: number, b: number, c: number, d: any) => [number, number];
|
|
319
|
+
readonly tokenizerbuilder_build: (a: number) => [number, number, number];
|
|
320
|
+
readonly tokenizerbuilder_new: () => [number, number, number];
|
|
321
|
+
readonly tokenizerbuilder_setDictionary: (a: number, b: number, c: number) => [number, number];
|
|
322
|
+
readonly tokenizerbuilder_setKeepWhitespace: (a: number, b: number) => [number, number];
|
|
323
|
+
readonly tokenizerbuilder_setMode: (a: number, b: number, c: number) => [number, number];
|
|
324
|
+
readonly tokenizerbuilder_setUserDictionary: (a: number, b: number, c: number) => [number, number];
|
|
325
|
+
readonly tokenizerbuilder_set_dictionary: (a: number, b: number, c: number) => [number, number];
|
|
326
|
+
readonly tokenizerbuilder_set_keep_whitespace: (a: number, b: number) => [number, number];
|
|
327
|
+
readonly tokenizerbuilder_set_mode: (a: number, b: number, c: number) => [number, number];
|
|
328
|
+
readonly tokenizerbuilder_set_user_dictionary: (a: number, b: number, c: number) => [number, number];
|
|
329
|
+
readonly __wbg_get_token_byte_end: (a: number) => number;
|
|
330
|
+
readonly __wbg_get_token_details: (a: number) => [number, number];
|
|
331
|
+
readonly __wbg_get_token_is_unknown: (a: number) => number;
|
|
332
|
+
readonly __wbg_get_token_position: (a: number) => number;
|
|
333
|
+
readonly __wbg_get_token_word_id: (a: number) => number;
|
|
282
334
|
readonly __wbg_linderaerror_free: (a: number, b: number) => void;
|
|
283
|
-
readonly
|
|
335
|
+
readonly __wbg_set_token_byte_end: (a: number, b: number) => void;
|
|
336
|
+
readonly __wbg_set_token_details: (a: number, b: number, c: number) => void;
|
|
337
|
+
readonly __wbg_set_token_is_unknown: (a: number, b: number) => void;
|
|
338
|
+
readonly __wbg_set_token_position: (a: number, b: number) => void;
|
|
339
|
+
readonly __wbg_set_token_word_id: (a: number, b: number) => void;
|
|
340
|
+
readonly __wbg_token_free: (a: number, b: number) => void;
|
|
284
341
|
readonly jslinderaerror_new: (a: number, b: number) => number;
|
|
285
342
|
readonly jslinderaerror_toString: (a: number) => [number, number];
|
|
343
|
+
readonly token_getDetail: (a: number, b: number) => [number, number];
|
|
344
|
+
readonly token_toJSON: (a: number) => any;
|
|
345
|
+
readonly __wbg_metadata_free: (a: number, b: number) => void;
|
|
346
|
+
readonly __wbg_segmenter_free: (a: number, b: number) => void;
|
|
286
347
|
readonly jsmetadata_compress_algorithm: (a: number) => number;
|
|
287
348
|
readonly jsmetadata_createDefault: () => number;
|
|
288
349
|
readonly jsmetadata_dictionary_schema: (a: number) => number;
|
|
@@ -295,23 +356,11 @@ export interface InitOutput {
|
|
|
295
356
|
readonly jsmetadata_set_name: (a: number, b: number, c: number) => void;
|
|
296
357
|
readonly jsmetadata_set_user_dictionary_schema: (a: number, b: number) => void;
|
|
297
358
|
readonly jsmetadata_user_dictionary_schema: (a: number) => number;
|
|
298
|
-
readonly __wbg_get_token_byte_end: (a: number) => number;
|
|
299
|
-
readonly __wbg_get_token_details: (a: number) => [number, number];
|
|
300
|
-
readonly __wbg_get_token_position: (a: number) => number;
|
|
301
|
-
readonly __wbg_get_token_word_id: (a: number) => number;
|
|
302
|
-
readonly __wbg_set_token_byte_end: (a: number, b: number) => void;
|
|
303
|
-
readonly __wbg_set_token_details: (a: number, b: number, c: number) => void;
|
|
304
|
-
readonly __wbg_set_token_position: (a: number, b: number) => void;
|
|
305
|
-
readonly __wbg_set_token_word_id: (a: number, b: number) => void;
|
|
306
|
-
readonly __wbg_token_free: (a: number, b: number) => void;
|
|
307
|
-
readonly jstoken_getDetail: (a: number, b: number) => [number, number];
|
|
308
|
-
readonly jstoken_toJSON: (a: number) => any;
|
|
309
359
|
readonly __wbg_get_penalty_kanji_penalty_length_penalty: (a: number) => number;
|
|
310
360
|
readonly __wbg_get_penalty_kanji_penalty_length_threshold: (a: number) => number;
|
|
311
361
|
readonly __wbg_get_penalty_other_penalty_length_penalty: (a: number) => number;
|
|
312
362
|
readonly __wbg_get_penalty_other_penalty_length_threshold: (a: number) => number;
|
|
313
363
|
readonly __wbg_penalty_free: (a: number, b: number) => void;
|
|
314
|
-
readonly __wbg_segmenter_free: (a: number, b: number) => void;
|
|
315
364
|
readonly __wbg_set_penalty_kanji_penalty_length_penalty: (a: number, b: number) => void;
|
|
316
365
|
readonly __wbg_set_penalty_kanji_penalty_length_threshold: (a: number, b: number) => void;
|
|
317
366
|
readonly __wbg_set_penalty_other_penalty_length_penalty: (a: number, b: number) => void;
|
|
@@ -322,24 +371,6 @@ export interface InitOutput {
|
|
|
322
371
|
readonly jspenalty_new: (a: number, b: number, c: number, d: number) => number;
|
|
323
372
|
readonly load_dictionary: (a: number, b: number) => [number, number, number];
|
|
324
373
|
readonly load_user_dictionary: (a: number, b: number, c: number) => [number, number, number];
|
|
325
|
-
readonly __wbg_tokenizer_free: (a: number, b: number) => void;
|
|
326
|
-
readonly __wbg_tokenizerbuilder_free: (a: number, b: number) => void;
|
|
327
|
-
readonly tokenizer_new: (a: number, b: number, c: number, d: number) => [number, number, number];
|
|
328
|
-
readonly tokenizer_tokenize: (a: number, b: number, c: number) => [number, number, number, number];
|
|
329
|
-
readonly tokenizerbuilder_appendCharacterFilter: (a: number, b: number, c: number, d: any) => [number, number];
|
|
330
|
-
readonly tokenizerbuilder_appendTokenFilter: (a: number, b: number, c: number, d: any) => [number, number];
|
|
331
|
-
readonly tokenizerbuilder_append_character_filter: (a: number, b: number, c: number, d: any) => [number, number];
|
|
332
|
-
readonly tokenizerbuilder_append_token_filter: (a: number, b: number, c: number, d: any) => [number, number];
|
|
333
|
-
readonly tokenizerbuilder_build: (a: number) => [number, number, number];
|
|
334
|
-
readonly tokenizerbuilder_new: () => [number, number, number];
|
|
335
|
-
readonly tokenizerbuilder_setDictionary: (a: number, b: number, c: number) => [number, number];
|
|
336
|
-
readonly tokenizerbuilder_setKeepWhitespace: (a: number, b: number) => [number, number];
|
|
337
|
-
readonly tokenizerbuilder_setMode: (a: number, b: number, c: number) => [number, number];
|
|
338
|
-
readonly tokenizerbuilder_setUserDictionary: (a: number, b: number, c: number) => [number, number];
|
|
339
|
-
readonly tokenizerbuilder_set_dictionary: (a: number, b: number, c: number) => [number, number];
|
|
340
|
-
readonly tokenizerbuilder_set_keep_whitespace: (a: number, b: number) => [number, number];
|
|
341
|
-
readonly tokenizerbuilder_set_mode: (a: number, b: number, c: number) => [number, number];
|
|
342
|
-
readonly tokenizerbuilder_set_user_dictionary: (a: number, b: number, c: number) => [number, number];
|
|
343
374
|
readonly __wbg_fielddefinition_free: (a: number, b: number) => void;
|
|
344
375
|
readonly __wbg_get_fielddefinition_description: (a: number) => [number, number];
|
|
345
376
|
readonly __wbg_get_fielddefinition_field_type: (a: number) => number;
|
package/lindera_wasm.js
CHANGED
|
@@ -507,49 +507,6 @@ export class JsSchema {
|
|
|
507
507
|
}
|
|
508
508
|
if (Symbol.dispose) JsSchema.prototype[Symbol.dispose] = JsSchema.prototype.free;
|
|
509
509
|
|
|
510
|
-
export class JsToken {
|
|
511
|
-
__destroy_into_raw() {
|
|
512
|
-
const ptr = this.__wbg_ptr;
|
|
513
|
-
this.__wbg_ptr = 0;
|
|
514
|
-
JsTokenFinalization.unregister(this);
|
|
515
|
-
return ptr;
|
|
516
|
-
}
|
|
517
|
-
free() {
|
|
518
|
-
const ptr = this.__destroy_into_raw();
|
|
519
|
-
wasm.__wbg_jstoken_free(ptr, 0);
|
|
520
|
-
}
|
|
521
|
-
/**
|
|
522
|
-
* Returns the detail at the specified index.
|
|
523
|
-
*
|
|
524
|
-
* # Parameters
|
|
525
|
-
*
|
|
526
|
-
* - `index`: Index of the detail to retrieve.
|
|
527
|
-
*
|
|
528
|
-
* # Returns
|
|
529
|
-
*
|
|
530
|
-
* The detail string if found, otherwise undefined.
|
|
531
|
-
* @param {number} index
|
|
532
|
-
* @returns {string | undefined}
|
|
533
|
-
*/
|
|
534
|
-
getDetail(index) {
|
|
535
|
-
const ret = wasm.jstoken_getDetail(this.__wbg_ptr, index);
|
|
536
|
-
let v1;
|
|
537
|
-
if (ret[0] !== 0) {
|
|
538
|
-
v1 = getStringFromWasm0(ret[0], ret[1]).slice();
|
|
539
|
-
wasm.__wbindgen_free(ret[0], ret[1] * 1, 1);
|
|
540
|
-
}
|
|
541
|
-
return v1;
|
|
542
|
-
}
|
|
543
|
-
/**
|
|
544
|
-
* @returns {any}
|
|
545
|
-
*/
|
|
546
|
-
toJSON() {
|
|
547
|
-
const ret = wasm.jstoken_toJSON(this.__wbg_ptr);
|
|
548
|
-
return ret;
|
|
549
|
-
}
|
|
550
|
-
}
|
|
551
|
-
if (Symbol.dispose) JsToken.prototype[Symbol.dispose] = JsToken.prototype.free;
|
|
552
|
-
|
|
553
510
|
/**
|
|
554
511
|
* Error type for Lindera operations.
|
|
555
512
|
*/
|
|
@@ -807,6 +764,14 @@ export class Token {
|
|
|
807
764
|
wasm.__wbindgen_free(ret[0], ret[1] * 4, 4);
|
|
808
765
|
return v1;
|
|
809
766
|
}
|
|
767
|
+
/**
|
|
768
|
+
* Whether this token is an unknown word (not found in the dictionary).
|
|
769
|
+
* @returns {boolean}
|
|
770
|
+
*/
|
|
771
|
+
get is_unknown() {
|
|
772
|
+
const ret = wasm.__wbg_get_token_is_unknown(this.__wbg_ptr);
|
|
773
|
+
return ret !== 0;
|
|
774
|
+
}
|
|
810
775
|
/**
|
|
811
776
|
* Position index of the token.
|
|
812
777
|
* @returns {number}
|
|
@@ -862,6 +827,13 @@ export class Token {
|
|
|
862
827
|
const len0 = WASM_VECTOR_LEN;
|
|
863
828
|
wasm.__wbg_set_token_details(this.__wbg_ptr, ptr0, len0);
|
|
864
829
|
}
|
|
830
|
+
/**
|
|
831
|
+
* Whether this token is an unknown word (not found in the dictionary).
|
|
832
|
+
* @param {boolean} arg0
|
|
833
|
+
*/
|
|
834
|
+
set is_unknown(arg0) {
|
|
835
|
+
wasm.__wbg_set_token_is_unknown(this.__wbg_ptr, arg0);
|
|
836
|
+
}
|
|
865
837
|
/**
|
|
866
838
|
* Position index of the token.
|
|
867
839
|
* @param {number} arg0
|
|
@@ -885,6 +857,35 @@ export class Token {
|
|
|
885
857
|
set word_id(arg0) {
|
|
886
858
|
wasm.__wbg_set_token_word_id(this.__wbg_ptr, arg0);
|
|
887
859
|
}
|
|
860
|
+
/**
|
|
861
|
+
* Returns the detail at the specified index.
|
|
862
|
+
*
|
|
863
|
+
* # Parameters
|
|
864
|
+
*
|
|
865
|
+
* - `index`: Index of the detail to retrieve.
|
|
866
|
+
*
|
|
867
|
+
* # Returns
|
|
868
|
+
*
|
|
869
|
+
* The detail string if found, otherwise undefined.
|
|
870
|
+
* @param {number} index
|
|
871
|
+
* @returns {string | undefined}
|
|
872
|
+
*/
|
|
873
|
+
getDetail(index) {
|
|
874
|
+
const ret = wasm.token_getDetail(this.__wbg_ptr, index);
|
|
875
|
+
let v1;
|
|
876
|
+
if (ret[0] !== 0) {
|
|
877
|
+
v1 = getStringFromWasm0(ret[0], ret[1]).slice();
|
|
878
|
+
wasm.__wbindgen_free(ret[0], ret[1] * 1, 1);
|
|
879
|
+
}
|
|
880
|
+
return v1;
|
|
881
|
+
}
|
|
882
|
+
/**
|
|
883
|
+
* @returns {any}
|
|
884
|
+
*/
|
|
885
|
+
toJSON() {
|
|
886
|
+
const ret = wasm.token_toJSON(this.__wbg_ptr);
|
|
887
|
+
return ret;
|
|
888
|
+
}
|
|
888
889
|
}
|
|
889
890
|
if (Symbol.dispose) Token.prototype[Symbol.dispose] = Token.prototype.free;
|
|
890
891
|
|
|
@@ -948,6 +949,42 @@ export class Tokenizer {
|
|
|
948
949
|
wasm.__wbindgen_free(ret[0], ret[1] * 4, 4);
|
|
949
950
|
return v2;
|
|
950
951
|
}
|
|
952
|
+
/**
|
|
953
|
+
* Tokenizes the input text and returns N-best results.
|
|
954
|
+
*
|
|
955
|
+
* Returns an array of arrays, where each inner array contains Token JSON objects.
|
|
956
|
+
* @param {string} input_text
|
|
957
|
+
* @param {number} n
|
|
958
|
+
* @param {boolean | null} [unique]
|
|
959
|
+
* @param {bigint | null} [cost_threshold]
|
|
960
|
+
* @returns {any}
|
|
961
|
+
*/
|
|
962
|
+
tokenizeNbest(input_text, n, unique, cost_threshold) {
|
|
963
|
+
const ptr0 = passStringToWasm0(input_text, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
964
|
+
const len0 = WASM_VECTOR_LEN;
|
|
965
|
+
const ret = wasm.tokenizer_tokenizeNbest(this.__wbg_ptr, ptr0, len0, n, isLikeNone(unique) ? 0xFFFFFF : unique ? 1 : 0, !isLikeNone(cost_threshold), isLikeNone(cost_threshold) ? BigInt(0) : cost_threshold);
|
|
966
|
+
if (ret[2]) {
|
|
967
|
+
throw takeFromExternrefTable0(ret[1]);
|
|
968
|
+
}
|
|
969
|
+
return takeFromExternrefTable0(ret[0]);
|
|
970
|
+
}
|
|
971
|
+
/**
|
|
972
|
+
* Tokenizes the input text and returns N-best results (snake_case alias).
|
|
973
|
+
* @param {string} input_text
|
|
974
|
+
* @param {number} n
|
|
975
|
+
* @param {boolean | null} [unique]
|
|
976
|
+
* @param {bigint | null} [cost_threshold]
|
|
977
|
+
* @returns {any}
|
|
978
|
+
*/
|
|
979
|
+
tokenize_nbest(input_text, n, unique, cost_threshold) {
|
|
980
|
+
const ptr0 = passStringToWasm0(input_text, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
981
|
+
const len0 = WASM_VECTOR_LEN;
|
|
982
|
+
const ret = wasm.tokenizer_tokenize_nbest(this.__wbg_ptr, ptr0, len0, n, isLikeNone(unique) ? 0xFFFFFF : unique ? 1 : 0, !isLikeNone(cost_threshold), isLikeNone(cost_threshold) ? BigInt(0) : cost_threshold);
|
|
983
|
+
if (ret[2]) {
|
|
984
|
+
throw takeFromExternrefTable0(ret[1]);
|
|
985
|
+
}
|
|
986
|
+
return takeFromExternrefTable0(ret[0]);
|
|
987
|
+
}
|
|
951
988
|
}
|
|
952
989
|
if (Symbol.dispose) Tokenizer.prototype[Symbol.dispose] = Tokenizer.prototype.free;
|
|
953
990
|
|
|
@@ -1588,9 +1625,6 @@ const JsPenaltyFinalization = (typeof FinalizationRegistry === 'undefined')
|
|
|
1588
1625
|
const JsSchemaFinalization = (typeof FinalizationRegistry === 'undefined')
|
|
1589
1626
|
? { register: () => {}, unregister: () => {} }
|
|
1590
1627
|
: new FinalizationRegistry(ptr => wasm.__wbg_jsschema_free(ptr >>> 0, 1));
|
|
1591
|
-
const JsTokenFinalization = (typeof FinalizationRegistry === 'undefined')
|
|
1592
|
-
? { register: () => {}, unregister: () => {} }
|
|
1593
|
-
: new FinalizationRegistry(ptr => wasm.__wbg_jstoken_free(ptr >>> 0, 1));
|
|
1594
1628
|
const LinderaErrorFinalization = (typeof FinalizationRegistry === 'undefined')
|
|
1595
1629
|
? { register: () => {}, unregister: () => {} }
|
|
1596
1630
|
: new FinalizationRegistry(ptr => wasm.__wbg_linderaerror_free(ptr >>> 0, 1));
|
package/lindera_wasm_bg.wasm
CHANGED
|
Binary file
|