lindera-wasm-nodejs-cc-cedict 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +135 -0
- package/lindera_wasm.d.ts +257 -0
- package/lindera_wasm.js +820 -0
- package/lindera_wasm_bg.wasm +0 -0
- package/package.json +25 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 by the project authors.
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
# lindera-wasm
|
|
2
|
+
|
|
3
|
+
WebAssembly of Lindera
|
|
4
|
+
|
|
5
|
+

|
|
6
|
+
|
|
7
|
+
## Demo Application
|
|
8
|
+
|
|
9
|
+
- <https://lindera.github.io/lindera-wasm/>
|
|
10
|
+
|
|
11
|
+
## npm
|
|
12
|
+
|
|
13
|
+
### Web
|
|
14
|
+
|
|
15
|
+
- <https://www.npmjs.com/package/lindera-wasm-web-cjk>
|
|
16
|
+
Lindera WASM with CJK dictionaries (IPADIC, ko-dic, CC-CEDICT) for Web
|
|
17
|
+
|
|
18
|
+
- <https://www.npmjs.com/package/lindera-wasm-web-ipadic>
|
|
19
|
+
Lindera WASM with Japanese dictionary (IPADIC) for Web
|
|
20
|
+
|
|
21
|
+
- <https://www.npmjs.com/package/lindera-wasm-web-unidic>
|
|
22
|
+
Lindera WASM with Japanese dictionary (UniDic) for Web
|
|
23
|
+
|
|
24
|
+
- <https://www.npmjs.com/package/lindera-wasm-web-ko-dic>
|
|
25
|
+
Lindera WASM with Korean dictionary (ko-dic) for Web
|
|
26
|
+
|
|
27
|
+
- <https://www.npmjs.com/package/lindera-wasm-web-cc-cedict>
|
|
28
|
+
Lindera WASM with Chinese dictionary (CC-CEDICT) for Web
|
|
29
|
+
|
|
30
|
+
### Node.js
|
|
31
|
+
|
|
32
|
+
- <https://www.npmjs.com/package/lindera-wasm-nodejs-cjk>
|
|
33
|
+
Lindera WASM with CJK dictionaries (IPADIC, ko-dic, CC-CEDICT) for Node.js
|
|
34
|
+
|
|
35
|
+
- <https://www.npmjs.com/package/lindera-wasm-nodejs-ipadic>
|
|
36
|
+
Lindera WASM with Japanese dictionary (IPADIC) for Node.js
|
|
37
|
+
|
|
38
|
+
- <https://www.npmjs.com/package/lindera-wasm-nodejs-unidic>
|
|
39
|
+
Lindera WASM with Japanese dictionary (UniDic) for Node.js
|
|
40
|
+
|
|
41
|
+
- <https://www.npmjs.com/package/lindera-wasm-nodejs-ko-dic>
|
|
42
|
+
Lindera WASM with Korean dictionary (ko-dic) for Node.js
|
|
43
|
+
|
|
44
|
+
- <https://www.npmjs.com/package/lindera-wasm-nodejs-cc-cedict>
|
|
45
|
+
Lindera WASM with Chinese dictionary (CC-CEDICT) for Node.js
|
|
46
|
+
|
|
47
|
+
## Usage
|
|
48
|
+
|
|
49
|
+
init the wasm module before construct `TokenizerBuilder`:
|
|
50
|
+
|
|
51
|
+
```ts
|
|
52
|
+
import __wbg_init, { TokenizerBuilder } from 'lindera-wasm'
|
|
53
|
+
|
|
54
|
+
__wbg_init.then(() => {
|
|
55
|
+
const builder = new TokenizerBuilder()
|
|
56
|
+
//...
|
|
57
|
+
})
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
### for [Vite](https://vite.dev/) base project
|
|
61
|
+
|
|
62
|
+
You should exclude this package in the `optimizeDeps`:
|
|
63
|
+
|
|
64
|
+
```ts
|
|
65
|
+
// vite.config.js
|
|
66
|
+
import { defineConfig } from 'vite'
|
|
67
|
+
|
|
68
|
+
export default defineConfig({
|
|
69
|
+
optimizeDeps: {
|
|
70
|
+
exclude: [
|
|
71
|
+
"lindera-wasm"
|
|
72
|
+
]
|
|
73
|
+
},
|
|
74
|
+
})
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### for Browser extension development
|
|
78
|
+
|
|
79
|
+
Set the `cors` config in vite.config.js
|
|
80
|
+
|
|
81
|
+
```ts
|
|
82
|
+
// vite.config.js
|
|
83
|
+
import { defineConfig } from 'vite'
|
|
84
|
+
|
|
85
|
+
export default defineConfig({
|
|
86
|
+
server: {
|
|
87
|
+
cors: {
|
|
88
|
+
origin: [
|
|
89
|
+
/chrome-extension:\/\//,
|
|
90
|
+
],
|
|
91
|
+
},
|
|
92
|
+
},
|
|
93
|
+
})
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
and set the `content_security_policy` to contains `wasm-unsafe-eval` in manifest.json:
|
|
97
|
+
|
|
98
|
+
```json
|
|
99
|
+
// manifest.json
|
|
100
|
+
"content_security_policy": {
|
|
101
|
+
"extension_pages": "script-src 'self' 'wasm-unsafe-eval';"
|
|
102
|
+
}
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
## Development
|
|
106
|
+
|
|
107
|
+
### Install project dependencies
|
|
108
|
+
|
|
109
|
+
- wasm-pack : <https://rustwasm.github.io/wasm-pack/installer/>
|
|
110
|
+
|
|
111
|
+
### Setup repository
|
|
112
|
+
|
|
113
|
+
```shell
|
|
114
|
+
# Clone lindera-py project repository
|
|
115
|
+
% git clone git@github.com:lindera/lindera-wasm.git
|
|
116
|
+
% cd lindera-wasm
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
### Build project
|
|
120
|
+
|
|
121
|
+
```shell
|
|
122
|
+
% wasm-pack build --release --features=cjk --target=bundler
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
### Build example web application
|
|
126
|
+
|
|
127
|
+
```shell
|
|
128
|
+
% cd lindera-wasm && npm install && npm run build && cp index.html dist/index.html
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
### Run example web application
|
|
132
|
+
|
|
133
|
+
```shell
|
|
134
|
+
% cd lindera-wasm && npm run start
|
|
135
|
+
```
|
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
/* tslint:disable */
|
|
2
|
+
/* eslint-disable */
|
|
3
|
+
/**
|
|
4
|
+
* Gets the version of the lindera-wasm library.
|
|
5
|
+
*
|
|
6
|
+
* # Returns
|
|
7
|
+
*
|
|
8
|
+
* The version string of the library (e.g., "1.0.0").
|
|
9
|
+
*
|
|
10
|
+
* # Examples
|
|
11
|
+
*
|
|
12
|
+
* ```javascript
|
|
13
|
+
* import { getVersion } from 'lindera-wasm';
|
|
14
|
+
* console.log(getVersion()); // "1.0.0"
|
|
15
|
+
* ```
|
|
16
|
+
*/
|
|
17
|
+
export function getVersion(): string;
|
|
18
|
+
/**
|
|
19
|
+
* A tokenizer for morphological analysis.
|
|
20
|
+
*
|
|
21
|
+
* The `Tokenizer` performs text tokenization based on the configuration
|
|
22
|
+
* provided by [`TokenizerBuilder`].
|
|
23
|
+
*
|
|
24
|
+
* # Examples
|
|
25
|
+
*
|
|
26
|
+
* ```javascript
|
|
27
|
+
* const builder = new TokenizerBuilder();
|
|
28
|
+
* builder.setDictionary("embedded://ipadic");
|
|
29
|
+
* builder.setMode("normal");
|
|
30
|
+
*
|
|
31
|
+
* const tokenizer = builder.build();
|
|
32
|
+
* const tokens = tokenizer.tokenize("関西国際空港");
|
|
33
|
+
* console.log(tokens);
|
|
34
|
+
* // Output: [
|
|
35
|
+
* // { surface: "関西国際空港", ... },
|
|
36
|
+
* // ...
|
|
37
|
+
* // ]
|
|
38
|
+
* ```
|
|
39
|
+
*/
|
|
40
|
+
export class Tokenizer {
|
|
41
|
+
private constructor();
|
|
42
|
+
free(): void;
|
|
43
|
+
[Symbol.dispose](): void;
|
|
44
|
+
/**
|
|
45
|
+
* Tokenizes the input text.
|
|
46
|
+
*
|
|
47
|
+
* Analyzes the input text and returns an array of token objects. Each token
|
|
48
|
+
* contains information such as surface form, part-of-speech tags, reading, etc.
|
|
49
|
+
* Field names in the returned objects are in camelCase.
|
|
50
|
+
*
|
|
51
|
+
* # Parameters
|
|
52
|
+
*
|
|
53
|
+
* - `input_text`: The text to tokenize.
|
|
54
|
+
*
|
|
55
|
+
* # Returns
|
|
56
|
+
*
|
|
57
|
+
* A JavaScript array of token objects. Each token object contains:
|
|
58
|
+
* - `surface`: The surface form of the token
|
|
59
|
+
* - `pos`: Part-of-speech tags
|
|
60
|
+
* - Additional language-specific fields
|
|
61
|
+
*
|
|
62
|
+
* # Errors
|
|
63
|
+
*
|
|
64
|
+
* Returns an error if tokenization fails.
|
|
65
|
+
*
|
|
66
|
+
* # Examples
|
|
67
|
+
*
|
|
68
|
+
* ```javascript
|
|
69
|
+
* const tokens = tokenizer.tokenize("東京都に行く");
|
|
70
|
+
* tokens.forEach(token => {
|
|
71
|
+
* console.log(token.surface, token.pos);
|
|
72
|
+
* });
|
|
73
|
+
* ```
|
|
74
|
+
*/
|
|
75
|
+
tokenize(input_text: string): any;
|
|
76
|
+
}
|
|
77
|
+
/**
|
|
78
|
+
* Builder for creating a [`Tokenizer`] instance.
|
|
79
|
+
*
|
|
80
|
+
* `TokenizerBuilder` provides a fluent API for configuring and building a tokenizer
|
|
81
|
+
* with various options such as dictionary selection, tokenization mode, character filters,
|
|
82
|
+
* and token filters.
|
|
83
|
+
*
|
|
84
|
+
* # Examples
|
|
85
|
+
*
|
|
86
|
+
* ```javascript
|
|
87
|
+
* const builder = new TokenizerBuilder();
|
|
88
|
+
* builder.setDictionary("embedded://ipadic");
|
|
89
|
+
* builder.setMode("normal");
|
|
90
|
+
* builder.setKeepWhitespace(false);
|
|
91
|
+
* builder.appendCharacterFilter("unicode_normalize", { "kind": "nfkc" });
|
|
92
|
+
* builder.appendTokenFilter("lowercase");
|
|
93
|
+
*
|
|
94
|
+
* const tokenizer = builder.build();
|
|
95
|
+
* ```
|
|
96
|
+
*/
|
|
97
|
+
export class TokenizerBuilder {
|
|
98
|
+
free(): void;
|
|
99
|
+
[Symbol.dispose](): void;
|
|
100
|
+
/**
|
|
101
|
+
* Creates a new `TokenizerBuilder` instance.
|
|
102
|
+
*
|
|
103
|
+
* # Returns
|
|
104
|
+
*
|
|
105
|
+
* A new `TokenizerBuilder` instance.
|
|
106
|
+
*
|
|
107
|
+
* # Errors
|
|
108
|
+
*
|
|
109
|
+
* Returns an error if the builder cannot be initialized.
|
|
110
|
+
*
|
|
111
|
+
* # Examples
|
|
112
|
+
*
|
|
113
|
+
* ```javascript
|
|
114
|
+
* const builder = new TokenizerBuilder();
|
|
115
|
+
* ```
|
|
116
|
+
*/
|
|
117
|
+
constructor();
|
|
118
|
+
/**
|
|
119
|
+
* Builds and returns a configured [`Tokenizer`] instance.
|
|
120
|
+
*
|
|
121
|
+
* This method consumes the builder and creates the final tokenizer with all
|
|
122
|
+
* configured settings.
|
|
123
|
+
*
|
|
124
|
+
* # Returns
|
|
125
|
+
*
|
|
126
|
+
* A configured `Tokenizer` instance.
|
|
127
|
+
*
|
|
128
|
+
* # Errors
|
|
129
|
+
*
|
|
130
|
+
* Returns an error if the tokenizer cannot be built with the current configuration.
|
|
131
|
+
*
|
|
132
|
+
* # Examples
|
|
133
|
+
*
|
|
134
|
+
* ```javascript
|
|
135
|
+
* const builder = new TokenizerBuilder();
|
|
136
|
+
* builder.setDictionary("embedded://ipadic");
|
|
137
|
+
* const tokenizer = builder.build();
|
|
138
|
+
* ```
|
|
139
|
+
*/
|
|
140
|
+
build(): Tokenizer;
|
|
141
|
+
/**
|
|
142
|
+
* Sets the tokenization mode.
|
|
143
|
+
*
|
|
144
|
+
* # Parameters
|
|
145
|
+
*
|
|
146
|
+
* - `mode`: The tokenization mode. Valid values are:
|
|
147
|
+
* - `"normal"`: Standard tokenization
|
|
148
|
+
* - `"decompose"`: Decomposes compound words into their components
|
|
149
|
+
*
|
|
150
|
+
* # Errors
|
|
151
|
+
*
|
|
152
|
+
* Returns an error if the mode string is invalid.
|
|
153
|
+
*
|
|
154
|
+
* # Examples
|
|
155
|
+
*
|
|
156
|
+
* ```javascript
|
|
157
|
+
* builder.setMode("normal");
|
|
158
|
+
* // or
|
|
159
|
+
* builder.setMode("decompose");
|
|
160
|
+
* ```
|
|
161
|
+
*/
|
|
162
|
+
setMode(mode: string): void;
|
|
163
|
+
/**
|
|
164
|
+
* Sets the dictionary to use for tokenization.
|
|
165
|
+
*
|
|
166
|
+
* # Parameters
|
|
167
|
+
*
|
|
168
|
+
* - `uri`: The dictionary URI. Valid embedded dictionaries are:
|
|
169
|
+
* - `"embedded://ipadic"`: Japanese IPADIC dictionary
|
|
170
|
+
* - `"embedded://unidic"`: Japanese UniDic dictionary
|
|
171
|
+
* - `"embedded://ko-dic"`: Korean ko-dic dictionary
|
|
172
|
+
* - `"embedded://cc-cedict"`: Chinese CC-CEDICT dictionary
|
|
173
|
+
*
|
|
174
|
+
* # Examples
|
|
175
|
+
*
|
|
176
|
+
* ```javascript
|
|
177
|
+
* builder.setDictionary("embedded://ipadic");
|
|
178
|
+
* ```
|
|
179
|
+
*/
|
|
180
|
+
setDictionary(uri: string): void;
|
|
181
|
+
/**
|
|
182
|
+
* Sets a user-defined dictionary.
|
|
183
|
+
*
|
|
184
|
+
* User dictionaries allow you to add custom words and their properties
|
|
185
|
+
* to supplement the main dictionary.
|
|
186
|
+
*
|
|
187
|
+
* # Parameters
|
|
188
|
+
*
|
|
189
|
+
* - `uri`: The URI to the user dictionary file.
|
|
190
|
+
*
|
|
191
|
+
* # Examples
|
|
192
|
+
*
|
|
193
|
+
* ```javascript
|
|
194
|
+
* builder.setUserDictionary("path/to/user_dict.csv");
|
|
195
|
+
* ```
|
|
196
|
+
*/
|
|
197
|
+
setUserDictionary(uri: string): void;
|
|
198
|
+
/**
|
|
199
|
+
* Sets whether to keep whitespace tokens in the output.
|
|
200
|
+
*
|
|
201
|
+
* # Parameters
|
|
202
|
+
*
|
|
203
|
+
* - `keep`: If `true`, whitespace tokens are preserved; if `false`, they are removed.
|
|
204
|
+
*
|
|
205
|
+
* # Examples
|
|
206
|
+
*
|
|
207
|
+
* ```javascript
|
|
208
|
+
* builder.setKeepWhitespace(false); // Remove whitespace tokens
|
|
209
|
+
* // or
|
|
210
|
+
* builder.setKeepWhitespace(true); // Keep whitespace tokens
|
|
211
|
+
* ```
|
|
212
|
+
*/
|
|
213
|
+
setKeepWhitespace(keep: boolean): void;
|
|
214
|
+
/**
|
|
215
|
+
* Appends a character filter to the tokenization pipeline.
|
|
216
|
+
*
|
|
217
|
+
* Character filters transform the input text before tokenization.
|
|
218
|
+
*
|
|
219
|
+
* # Parameters
|
|
220
|
+
*
|
|
221
|
+
* - `name`: The name of the character filter (e.g., `"unicode_normalize"`).
|
|
222
|
+
* - `args`: A JavaScript object containing filter-specific arguments.
|
|
223
|
+
*
|
|
224
|
+
* # Errors
|
|
225
|
+
*
|
|
226
|
+
* Returns an error if the arguments cannot be parsed.
|
|
227
|
+
*
|
|
228
|
+
* # Examples
|
|
229
|
+
*
|
|
230
|
+
* ```javascript
|
|
231
|
+
* builder.appendCharacterFilter("unicode_normalize", { "kind": "nfkc" });
|
|
232
|
+
* ```
|
|
233
|
+
*/
|
|
234
|
+
appendCharacterFilter(name: string, args: any): void;
|
|
235
|
+
/**
|
|
236
|
+
* Appends a token filter to the tokenization pipeline.
|
|
237
|
+
*
|
|
238
|
+
* Token filters transform or filter the tokens after tokenization.
|
|
239
|
+
*
|
|
240
|
+
* # Parameters
|
|
241
|
+
*
|
|
242
|
+
* - `name`: The name of the token filter (e.g., `"lowercase"`, `"japanese_number"`).
|
|
243
|
+
* - `args`: A JavaScript object containing filter-specific arguments.
|
|
244
|
+
*
|
|
245
|
+
* # Errors
|
|
246
|
+
*
|
|
247
|
+
* Returns an error if the arguments cannot be parsed.
|
|
248
|
+
*
|
|
249
|
+
* # Examples
|
|
250
|
+
*
|
|
251
|
+
* ```javascript
|
|
252
|
+
* builder.appendTokenFilter("lowercase");
|
|
253
|
+
* builder.appendTokenFilter("japanese_number", { "tags": ["名詞,数"] });
|
|
254
|
+
* ```
|
|
255
|
+
*/
|
|
256
|
+
appendTokenFilter(name: string, args: any): void;
|
|
257
|
+
}
|
package/lindera_wasm.js
ADDED
|
@@ -0,0 +1,820 @@
|
|
|
1
|
+
|
|
2
|
+
let imports = {};
|
|
3
|
+
imports['__wbindgen_placeholder__'] = module.exports;
|
|
4
|
+
|
|
5
|
+
let cachedUint8ArrayMemory0 = null;
|
|
6
|
+
|
|
7
|
+
function getUint8ArrayMemory0() {
|
|
8
|
+
if (cachedUint8ArrayMemory0 === null || cachedUint8ArrayMemory0.byteLength === 0) {
|
|
9
|
+
cachedUint8ArrayMemory0 = new Uint8Array(wasm.memory.buffer);
|
|
10
|
+
}
|
|
11
|
+
return cachedUint8ArrayMemory0;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
let cachedTextDecoder = new TextDecoder('utf-8', { ignoreBOM: true, fatal: true });
|
|
15
|
+
|
|
16
|
+
cachedTextDecoder.decode();
|
|
17
|
+
|
|
18
|
+
function decodeText(ptr, len) {
|
|
19
|
+
return cachedTextDecoder.decode(getUint8ArrayMemory0().subarray(ptr, ptr + len));
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
function getStringFromWasm0(ptr, len) {
|
|
23
|
+
ptr = ptr >>> 0;
|
|
24
|
+
return decodeText(ptr, len);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
let WASM_VECTOR_LEN = 0;
|
|
28
|
+
|
|
29
|
+
const cachedTextEncoder = new TextEncoder();
|
|
30
|
+
|
|
31
|
+
if (!('encodeInto' in cachedTextEncoder)) {
|
|
32
|
+
cachedTextEncoder.encodeInto = function (arg, view) {
|
|
33
|
+
const buf = cachedTextEncoder.encode(arg);
|
|
34
|
+
view.set(buf);
|
|
35
|
+
return {
|
|
36
|
+
read: arg.length,
|
|
37
|
+
written: buf.length
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
function passStringToWasm0(arg, malloc, realloc) {
|
|
43
|
+
|
|
44
|
+
if (realloc === undefined) {
|
|
45
|
+
const buf = cachedTextEncoder.encode(arg);
|
|
46
|
+
const ptr = malloc(buf.length, 1) >>> 0;
|
|
47
|
+
getUint8ArrayMemory0().subarray(ptr, ptr + buf.length).set(buf);
|
|
48
|
+
WASM_VECTOR_LEN = buf.length;
|
|
49
|
+
return ptr;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
let len = arg.length;
|
|
53
|
+
let ptr = malloc(len, 1) >>> 0;
|
|
54
|
+
|
|
55
|
+
const mem = getUint8ArrayMemory0();
|
|
56
|
+
|
|
57
|
+
let offset = 0;
|
|
58
|
+
|
|
59
|
+
for (; offset < len; offset++) {
|
|
60
|
+
const code = arg.charCodeAt(offset);
|
|
61
|
+
if (code > 0x7F) break;
|
|
62
|
+
mem[ptr + offset] = code;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
if (offset !== len) {
|
|
66
|
+
if (offset !== 0) {
|
|
67
|
+
arg = arg.slice(offset);
|
|
68
|
+
}
|
|
69
|
+
ptr = realloc(ptr, len, len = offset + arg.length * 3, 1) >>> 0;
|
|
70
|
+
const view = getUint8ArrayMemory0().subarray(ptr + offset, ptr + len);
|
|
71
|
+
const ret = cachedTextEncoder.encodeInto(arg, view);
|
|
72
|
+
|
|
73
|
+
offset += ret.written;
|
|
74
|
+
ptr = realloc(ptr, len, offset, 1) >>> 0;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
WASM_VECTOR_LEN = offset;
|
|
78
|
+
return ptr;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
let cachedDataViewMemory0 = null;
|
|
82
|
+
|
|
83
|
+
function getDataViewMemory0() {
|
|
84
|
+
if (cachedDataViewMemory0 === null || cachedDataViewMemory0.buffer.detached === true || (cachedDataViewMemory0.buffer.detached === undefined && cachedDataViewMemory0.buffer !== wasm.memory.buffer)) {
|
|
85
|
+
cachedDataViewMemory0 = new DataView(wasm.memory.buffer);
|
|
86
|
+
}
|
|
87
|
+
return cachedDataViewMemory0;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function addToExternrefTable0(obj) {
|
|
91
|
+
const idx = wasm.__externref_table_alloc();
|
|
92
|
+
wasm.__wbindgen_export_4.set(idx, obj);
|
|
93
|
+
return idx;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
function handleError(f, args) {
|
|
97
|
+
try {
|
|
98
|
+
return f.apply(this, args);
|
|
99
|
+
} catch (e) {
|
|
100
|
+
const idx = addToExternrefTable0(e);
|
|
101
|
+
wasm.__wbindgen_exn_store(idx);
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
function getArrayU8FromWasm0(ptr, len) {
|
|
106
|
+
ptr = ptr >>> 0;
|
|
107
|
+
return getUint8ArrayMemory0().subarray(ptr / 1, ptr / 1 + len);
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
function isLikeNone(x) {
|
|
111
|
+
return x === undefined || x === null;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
function debugString(val) {
|
|
115
|
+
// primitive types
|
|
116
|
+
const type = typeof val;
|
|
117
|
+
if (type == 'number' || type == 'boolean' || val == null) {
|
|
118
|
+
return `${val}`;
|
|
119
|
+
}
|
|
120
|
+
if (type == 'string') {
|
|
121
|
+
return `"${val}"`;
|
|
122
|
+
}
|
|
123
|
+
if (type == 'symbol') {
|
|
124
|
+
const description = val.description;
|
|
125
|
+
if (description == null) {
|
|
126
|
+
return 'Symbol';
|
|
127
|
+
} else {
|
|
128
|
+
return `Symbol(${description})`;
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
if (type == 'function') {
|
|
132
|
+
const name = val.name;
|
|
133
|
+
if (typeof name == 'string' && name.length > 0) {
|
|
134
|
+
return `Function(${name})`;
|
|
135
|
+
} else {
|
|
136
|
+
return 'Function';
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
// objects
|
|
140
|
+
if (Array.isArray(val)) {
|
|
141
|
+
const length = val.length;
|
|
142
|
+
let debug = '[';
|
|
143
|
+
if (length > 0) {
|
|
144
|
+
debug += debugString(val[0]);
|
|
145
|
+
}
|
|
146
|
+
for(let i = 1; i < length; i++) {
|
|
147
|
+
debug += ', ' + debugString(val[i]);
|
|
148
|
+
}
|
|
149
|
+
debug += ']';
|
|
150
|
+
return debug;
|
|
151
|
+
}
|
|
152
|
+
// Test for built-in
|
|
153
|
+
const builtInMatches = /\[object ([^\]]+)\]/.exec(toString.call(val));
|
|
154
|
+
let className;
|
|
155
|
+
if (builtInMatches && builtInMatches.length > 1) {
|
|
156
|
+
className = builtInMatches[1];
|
|
157
|
+
} else {
|
|
158
|
+
// Failed to match the standard '[object ClassName]'
|
|
159
|
+
return toString.call(val);
|
|
160
|
+
}
|
|
161
|
+
if (className == 'Object') {
|
|
162
|
+
// we're a user defined class or Object
|
|
163
|
+
// JSON.stringify avoids problems with cycles, and is generally much
|
|
164
|
+
// easier than looping through ownProperties of `val`.
|
|
165
|
+
try {
|
|
166
|
+
return 'Object(' + JSON.stringify(val) + ')';
|
|
167
|
+
} catch (_) {
|
|
168
|
+
return 'Object';
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
// errors
|
|
172
|
+
if (val instanceof Error) {
|
|
173
|
+
return `${val.name}: ${val.message}\n${val.stack}`;
|
|
174
|
+
}
|
|
175
|
+
// TODO we could test for more things here, like `Set`s and `Map`s.
|
|
176
|
+
return className;
|
|
177
|
+
}
|
|
178
|
+
/**
|
|
179
|
+
* Gets the version of the lindera-wasm library.
|
|
180
|
+
*
|
|
181
|
+
* # Returns
|
|
182
|
+
*
|
|
183
|
+
* The version string of the library (e.g., "1.0.0").
|
|
184
|
+
*
|
|
185
|
+
* # Examples
|
|
186
|
+
*
|
|
187
|
+
* ```javascript
|
|
188
|
+
* import { getVersion } from 'lindera-wasm';
|
|
189
|
+
* console.log(getVersion()); // "1.0.0"
|
|
190
|
+
* ```
|
|
191
|
+
* @returns {string}
|
|
192
|
+
*/
|
|
193
|
+
exports.getVersion = function() {
|
|
194
|
+
let deferred1_0;
|
|
195
|
+
let deferred1_1;
|
|
196
|
+
try {
|
|
197
|
+
const ret = wasm.getVersion();
|
|
198
|
+
deferred1_0 = ret[0];
|
|
199
|
+
deferred1_1 = ret[1];
|
|
200
|
+
return getStringFromWasm0(ret[0], ret[1]);
|
|
201
|
+
} finally {
|
|
202
|
+
wasm.__wbindgen_free(deferred1_0, deferred1_1, 1);
|
|
203
|
+
}
|
|
204
|
+
};
|
|
205
|
+
|
|
206
|
+
function takeFromExternrefTable0(idx) {
|
|
207
|
+
const value = wasm.__wbindgen_export_4.get(idx);
|
|
208
|
+
wasm.__externref_table_dealloc(idx);
|
|
209
|
+
return value;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
const TokenizerFinalization = (typeof FinalizationRegistry === 'undefined')
|
|
213
|
+
? { register: () => {}, unregister: () => {} }
|
|
214
|
+
: new FinalizationRegistry(ptr => wasm.__wbg_tokenizer_free(ptr >>> 0, 1));
|
|
215
|
+
/**
|
|
216
|
+
* A tokenizer for morphological analysis.
|
|
217
|
+
*
|
|
218
|
+
* The `Tokenizer` performs text tokenization based on the configuration
|
|
219
|
+
* provided by [`TokenizerBuilder`].
|
|
220
|
+
*
|
|
221
|
+
* # Examples
|
|
222
|
+
*
|
|
223
|
+
* ```javascript
|
|
224
|
+
* const builder = new TokenizerBuilder();
|
|
225
|
+
* builder.setDictionary("embedded://ipadic");
|
|
226
|
+
* builder.setMode("normal");
|
|
227
|
+
*
|
|
228
|
+
* const tokenizer = builder.build();
|
|
229
|
+
* const tokens = tokenizer.tokenize("関西国際空港");
|
|
230
|
+
* console.log(tokens);
|
|
231
|
+
* // Output: [
|
|
232
|
+
* // { surface: "関西国際空港", ... },
|
|
233
|
+
* // ...
|
|
234
|
+
* // ]
|
|
235
|
+
* ```
|
|
236
|
+
*/
|
|
237
|
+
class Tokenizer {
|
|
238
|
+
|
|
239
|
+
static __wrap(ptr) {
|
|
240
|
+
ptr = ptr >>> 0;
|
|
241
|
+
const obj = Object.create(Tokenizer.prototype);
|
|
242
|
+
obj.__wbg_ptr = ptr;
|
|
243
|
+
TokenizerFinalization.register(obj, obj.__wbg_ptr, obj);
|
|
244
|
+
return obj;
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
__destroy_into_raw() {
|
|
248
|
+
const ptr = this.__wbg_ptr;
|
|
249
|
+
this.__wbg_ptr = 0;
|
|
250
|
+
TokenizerFinalization.unregister(this);
|
|
251
|
+
return ptr;
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
free() {
|
|
255
|
+
const ptr = this.__destroy_into_raw();
|
|
256
|
+
wasm.__wbg_tokenizer_free(ptr, 0);
|
|
257
|
+
}
|
|
258
|
+
/**
|
|
259
|
+
* Tokenizes the input text.
|
|
260
|
+
*
|
|
261
|
+
* Analyzes the input text and returns an array of token objects. Each token
|
|
262
|
+
* contains information such as surface form, part-of-speech tags, reading, etc.
|
|
263
|
+
* Field names in the returned objects are in camelCase.
|
|
264
|
+
*
|
|
265
|
+
* # Parameters
|
|
266
|
+
*
|
|
267
|
+
* - `input_text`: The text to tokenize.
|
|
268
|
+
*
|
|
269
|
+
* # Returns
|
|
270
|
+
*
|
|
271
|
+
* A JavaScript array of token objects. Each token object contains:
|
|
272
|
+
* - `surface`: The surface form of the token
|
|
273
|
+
* - `pos`: Part-of-speech tags
|
|
274
|
+
* - Additional language-specific fields
|
|
275
|
+
*
|
|
276
|
+
* # Errors
|
|
277
|
+
*
|
|
278
|
+
* Returns an error if tokenization fails.
|
|
279
|
+
*
|
|
280
|
+
* # Examples
|
|
281
|
+
*
|
|
282
|
+
* ```javascript
|
|
283
|
+
* const tokens = tokenizer.tokenize("東京都に行く");
|
|
284
|
+
* tokens.forEach(token => {
|
|
285
|
+
* console.log(token.surface, token.pos);
|
|
286
|
+
* });
|
|
287
|
+
* ```
|
|
288
|
+
* @param {string} input_text
|
|
289
|
+
* @returns {any}
|
|
290
|
+
*/
|
|
291
|
+
tokenize(input_text) {
|
|
292
|
+
const ptr0 = passStringToWasm0(input_text, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
293
|
+
const len0 = WASM_VECTOR_LEN;
|
|
294
|
+
const ret = wasm.tokenizer_tokenize(this.__wbg_ptr, ptr0, len0);
|
|
295
|
+
if (ret[2]) {
|
|
296
|
+
throw takeFromExternrefTable0(ret[1]);
|
|
297
|
+
}
|
|
298
|
+
return takeFromExternrefTable0(ret[0]);
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
if (Symbol.dispose) Tokenizer.prototype[Symbol.dispose] = Tokenizer.prototype.free;
|
|
302
|
+
|
|
303
|
+
exports.Tokenizer = Tokenizer;
|
|
304
|
+
|
|
305
|
+
const TokenizerBuilderFinalization = (typeof FinalizationRegistry === 'undefined')
|
|
306
|
+
? { register: () => {}, unregister: () => {} }
|
|
307
|
+
: new FinalizationRegistry(ptr => wasm.__wbg_tokenizerbuilder_free(ptr >>> 0, 1));
|
|
308
|
+
/**
|
|
309
|
+
* Builder for creating a [`Tokenizer`] instance.
|
|
310
|
+
*
|
|
311
|
+
* `TokenizerBuilder` provides a fluent API for configuring and building a tokenizer
|
|
312
|
+
* with various options such as dictionary selection, tokenization mode, character filters,
|
|
313
|
+
* and token filters.
|
|
314
|
+
*
|
|
315
|
+
* # Examples
|
|
316
|
+
*
|
|
317
|
+
* ```javascript
|
|
318
|
+
* const builder = new TokenizerBuilder();
|
|
319
|
+
* builder.setDictionary("embedded://ipadic");
|
|
320
|
+
* builder.setMode("normal");
|
|
321
|
+
* builder.setKeepWhitespace(false);
|
|
322
|
+
* builder.appendCharacterFilter("unicode_normalize", { "kind": "nfkc" });
|
|
323
|
+
* builder.appendTokenFilter("lowercase");
|
|
324
|
+
*
|
|
325
|
+
* const tokenizer = builder.build();
|
|
326
|
+
* ```
|
|
327
|
+
*/
|
|
328
|
+
class TokenizerBuilder {
|
|
329
|
+
|
|
330
|
+
__destroy_into_raw() {
|
|
331
|
+
const ptr = this.__wbg_ptr;
|
|
332
|
+
this.__wbg_ptr = 0;
|
|
333
|
+
TokenizerBuilderFinalization.unregister(this);
|
|
334
|
+
return ptr;
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
free() {
|
|
338
|
+
const ptr = this.__destroy_into_raw();
|
|
339
|
+
wasm.__wbg_tokenizerbuilder_free(ptr, 0);
|
|
340
|
+
}
|
|
341
|
+
/**
|
|
342
|
+
* Creates a new `TokenizerBuilder` instance.
|
|
343
|
+
*
|
|
344
|
+
* # Returns
|
|
345
|
+
*
|
|
346
|
+
* A new `TokenizerBuilder` instance.
|
|
347
|
+
*
|
|
348
|
+
* # Errors
|
|
349
|
+
*
|
|
350
|
+
* Returns an error if the builder cannot be initialized.
|
|
351
|
+
*
|
|
352
|
+
* # Examples
|
|
353
|
+
*
|
|
354
|
+
* ```javascript
|
|
355
|
+
* const builder = new TokenizerBuilder();
|
|
356
|
+
* ```
|
|
357
|
+
*/
|
|
358
|
+
constructor() {
|
|
359
|
+
const ret = wasm.tokenizerbuilder_new();
|
|
360
|
+
if (ret[2]) {
|
|
361
|
+
throw takeFromExternrefTable0(ret[1]);
|
|
362
|
+
}
|
|
363
|
+
this.__wbg_ptr = ret[0] >>> 0;
|
|
364
|
+
TokenizerBuilderFinalization.register(this, this.__wbg_ptr, this);
|
|
365
|
+
return this;
|
|
366
|
+
}
|
|
367
|
+
/**
|
|
368
|
+
* Builds and returns a configured [`Tokenizer`] instance.
|
|
369
|
+
*
|
|
370
|
+
* This method consumes the builder and creates the final tokenizer with all
|
|
371
|
+
* configured settings.
|
|
372
|
+
*
|
|
373
|
+
* # Returns
|
|
374
|
+
*
|
|
375
|
+
* A configured `Tokenizer` instance.
|
|
376
|
+
*
|
|
377
|
+
* # Errors
|
|
378
|
+
*
|
|
379
|
+
* Returns an error if the tokenizer cannot be built with the current configuration.
|
|
380
|
+
*
|
|
381
|
+
* # Examples
|
|
382
|
+
*
|
|
383
|
+
* ```javascript
|
|
384
|
+
* const builder = new TokenizerBuilder();
|
|
385
|
+
* builder.setDictionary("embedded://ipadic");
|
|
386
|
+
* const tokenizer = builder.build();
|
|
387
|
+
* ```
|
|
388
|
+
* @returns {Tokenizer}
|
|
389
|
+
*/
|
|
390
|
+
build() {
|
|
391
|
+
const ptr = this.__destroy_into_raw();
|
|
392
|
+
const ret = wasm.tokenizerbuilder_build(ptr);
|
|
393
|
+
if (ret[2]) {
|
|
394
|
+
throw takeFromExternrefTable0(ret[1]);
|
|
395
|
+
}
|
|
396
|
+
return Tokenizer.__wrap(ret[0]);
|
|
397
|
+
}
|
|
398
|
+
/**
|
|
399
|
+
* Sets the tokenization mode.
|
|
400
|
+
*
|
|
401
|
+
* # Parameters
|
|
402
|
+
*
|
|
403
|
+
* - `mode`: The tokenization mode. Valid values are:
|
|
404
|
+
* - `"normal"`: Standard tokenization
|
|
405
|
+
* - `"decompose"`: Decomposes compound words into their components
|
|
406
|
+
*
|
|
407
|
+
* # Errors
|
|
408
|
+
*
|
|
409
|
+
* Returns an error if the mode string is invalid.
|
|
410
|
+
*
|
|
411
|
+
* # Examples
|
|
412
|
+
*
|
|
413
|
+
* ```javascript
|
|
414
|
+
* builder.setMode("normal");
|
|
415
|
+
* // or
|
|
416
|
+
* builder.setMode("decompose");
|
|
417
|
+
* ```
|
|
418
|
+
* @param {string} mode
|
|
419
|
+
*/
|
|
420
|
+
setMode(mode) {
|
|
421
|
+
const ptr0 = passStringToWasm0(mode, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
422
|
+
const len0 = WASM_VECTOR_LEN;
|
|
423
|
+
const ret = wasm.tokenizerbuilder_setMode(this.__wbg_ptr, ptr0, len0);
|
|
424
|
+
if (ret[1]) {
|
|
425
|
+
throw takeFromExternrefTable0(ret[0]);
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
/**
|
|
429
|
+
* Sets the dictionary to use for tokenization.
|
|
430
|
+
*
|
|
431
|
+
* # Parameters
|
|
432
|
+
*
|
|
433
|
+
* - `uri`: The dictionary URI. Valid embedded dictionaries are:
|
|
434
|
+
* - `"embedded://ipadic"`: Japanese IPADIC dictionary
|
|
435
|
+
* - `"embedded://unidic"`: Japanese UniDic dictionary
|
|
436
|
+
* - `"embedded://ko-dic"`: Korean ko-dic dictionary
|
|
437
|
+
* - `"embedded://cc-cedict"`: Chinese CC-CEDICT dictionary
|
|
438
|
+
*
|
|
439
|
+
* # Examples
|
|
440
|
+
*
|
|
441
|
+
* ```javascript
|
|
442
|
+
* builder.setDictionary("embedded://ipadic");
|
|
443
|
+
* ```
|
|
444
|
+
* @param {string} uri
|
|
445
|
+
*/
|
|
446
|
+
setDictionary(uri) {
|
|
447
|
+
const ptr0 = passStringToWasm0(uri, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
448
|
+
const len0 = WASM_VECTOR_LEN;
|
|
449
|
+
const ret = wasm.tokenizerbuilder_setDictionary(this.__wbg_ptr, ptr0, len0);
|
|
450
|
+
if (ret[1]) {
|
|
451
|
+
throw takeFromExternrefTable0(ret[0]);
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
/**
|
|
455
|
+
* Sets a user-defined dictionary.
|
|
456
|
+
*
|
|
457
|
+
* User dictionaries allow you to add custom words and their properties
|
|
458
|
+
* to supplement the main dictionary.
|
|
459
|
+
*
|
|
460
|
+
* # Parameters
|
|
461
|
+
*
|
|
462
|
+
* - `uri`: The URI to the user dictionary file.
|
|
463
|
+
*
|
|
464
|
+
* # Examples
|
|
465
|
+
*
|
|
466
|
+
* ```javascript
|
|
467
|
+
* builder.setUserDictionary("path/to/user_dict.csv");
|
|
468
|
+
* ```
|
|
469
|
+
* @param {string} uri
|
|
470
|
+
*/
|
|
471
|
+
setUserDictionary(uri) {
|
|
472
|
+
const ptr0 = passStringToWasm0(uri, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
473
|
+
const len0 = WASM_VECTOR_LEN;
|
|
474
|
+
const ret = wasm.tokenizerbuilder_setUserDictionary(this.__wbg_ptr, ptr0, len0);
|
|
475
|
+
if (ret[1]) {
|
|
476
|
+
throw takeFromExternrefTable0(ret[0]);
|
|
477
|
+
}
|
|
478
|
+
}
|
|
479
|
+
/**
|
|
480
|
+
* Sets whether to keep whitespace tokens in the output.
|
|
481
|
+
*
|
|
482
|
+
* # Parameters
|
|
483
|
+
*
|
|
484
|
+
* - `keep`: If `true`, whitespace tokens are preserved; if `false`, they are removed.
|
|
485
|
+
*
|
|
486
|
+
* # Examples
|
|
487
|
+
*
|
|
488
|
+
* ```javascript
|
|
489
|
+
* builder.setKeepWhitespace(false); // Remove whitespace tokens
|
|
490
|
+
* // or
|
|
491
|
+
* builder.setKeepWhitespace(true); // Keep whitespace tokens
|
|
492
|
+
* ```
|
|
493
|
+
* @param {boolean} keep
|
|
494
|
+
*/
|
|
495
|
+
setKeepWhitespace(keep) {
|
|
496
|
+
const ret = wasm.tokenizerbuilder_setKeepWhitespace(this.__wbg_ptr, keep);
|
|
497
|
+
if (ret[1]) {
|
|
498
|
+
throw takeFromExternrefTable0(ret[0]);
|
|
499
|
+
}
|
|
500
|
+
}
|
|
501
|
+
/**
|
|
502
|
+
* Appends a character filter to the tokenization pipeline.
|
|
503
|
+
*
|
|
504
|
+
* Character filters transform the input text before tokenization.
|
|
505
|
+
*
|
|
506
|
+
* # Parameters
|
|
507
|
+
*
|
|
508
|
+
* - `name`: The name of the character filter (e.g., `"unicode_normalize"`).
|
|
509
|
+
* - `args`: A JavaScript object containing filter-specific arguments.
|
|
510
|
+
*
|
|
511
|
+
* # Errors
|
|
512
|
+
*
|
|
513
|
+
* Returns an error if the arguments cannot be parsed.
|
|
514
|
+
*
|
|
515
|
+
* # Examples
|
|
516
|
+
*
|
|
517
|
+
* ```javascript
|
|
518
|
+
* builder.appendCharacterFilter("unicode_normalize", { "kind": "nfkc" });
|
|
519
|
+
* ```
|
|
520
|
+
* @param {string} name
|
|
521
|
+
* @param {any} args
|
|
522
|
+
*/
|
|
523
|
+
appendCharacterFilter(name, args) {
|
|
524
|
+
const ptr0 = passStringToWasm0(name, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
525
|
+
const len0 = WASM_VECTOR_LEN;
|
|
526
|
+
const ret = wasm.tokenizerbuilder_appendCharacterFilter(this.__wbg_ptr, ptr0, len0, args);
|
|
527
|
+
if (ret[1]) {
|
|
528
|
+
throw takeFromExternrefTable0(ret[0]);
|
|
529
|
+
}
|
|
530
|
+
}
|
|
531
|
+
/**
|
|
532
|
+
* Appends a token filter to the tokenization pipeline.
|
|
533
|
+
*
|
|
534
|
+
* Token filters transform or filter the tokens after tokenization.
|
|
535
|
+
*
|
|
536
|
+
* # Parameters
|
|
537
|
+
*
|
|
538
|
+
* - `name`: The name of the token filter (e.g., `"lowercase"`, `"japanese_number"`).
|
|
539
|
+
* - `args`: A JavaScript object containing filter-specific arguments.
|
|
540
|
+
*
|
|
541
|
+
* # Errors
|
|
542
|
+
*
|
|
543
|
+
* Returns an error if the arguments cannot be parsed.
|
|
544
|
+
*
|
|
545
|
+
* # Examples
|
|
546
|
+
*
|
|
547
|
+
* ```javascript
|
|
548
|
+
* builder.appendTokenFilter("lowercase");
|
|
549
|
+
* builder.appendTokenFilter("japanese_number", { "tags": ["名詞,数"] });
|
|
550
|
+
* ```
|
|
551
|
+
* @param {string} name
|
|
552
|
+
* @param {any} args
|
|
553
|
+
*/
|
|
554
|
+
appendTokenFilter(name, args) {
|
|
555
|
+
const ptr0 = passStringToWasm0(name, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
556
|
+
const len0 = WASM_VECTOR_LEN;
|
|
557
|
+
const ret = wasm.tokenizerbuilder_appendTokenFilter(this.__wbg_ptr, ptr0, len0, args);
|
|
558
|
+
if (ret[1]) {
|
|
559
|
+
throw takeFromExternrefTable0(ret[0]);
|
|
560
|
+
}
|
|
561
|
+
}
|
|
562
|
+
}
|
|
563
|
+
if (Symbol.dispose) TokenizerBuilder.prototype[Symbol.dispose] = TokenizerBuilder.prototype.free;
|
|
564
|
+
|
|
565
|
+
exports.TokenizerBuilder = TokenizerBuilder;
|
|
566
|
+
|
|
567
|
+
exports.__wbg_Error_e17e777aac105295 = function(arg0, arg1) {
|
|
568
|
+
const ret = Error(getStringFromWasm0(arg0, arg1));
|
|
569
|
+
return ret;
|
|
570
|
+
};
|
|
571
|
+
|
|
572
|
+
exports.__wbg_String_8f0eb39a4a4c2f66 = function(arg0, arg1) {
|
|
573
|
+
const ret = String(arg1);
|
|
574
|
+
const ptr1 = passStringToWasm0(ret, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
575
|
+
const len1 = WASM_VECTOR_LEN;
|
|
576
|
+
getDataViewMemory0().setInt32(arg0 + 4 * 1, len1, true);
|
|
577
|
+
getDataViewMemory0().setInt32(arg0 + 4 * 0, ptr1, true);
|
|
578
|
+
};
|
|
579
|
+
|
|
580
|
+
exports.__wbg_call_13410aac570ffff7 = function() { return handleError(function (arg0, arg1) {
|
|
581
|
+
const ret = arg0.call(arg1);
|
|
582
|
+
return ret;
|
|
583
|
+
}, arguments) };
|
|
584
|
+
|
|
585
|
+
exports.__wbg_done_75ed0ee6dd243d9d = function(arg0) {
|
|
586
|
+
const ret = arg0.done;
|
|
587
|
+
return ret;
|
|
588
|
+
};
|
|
589
|
+
|
|
590
|
+
exports.__wbg_entries_2be2f15bd5554996 = function(arg0) {
|
|
591
|
+
const ret = Object.entries(arg0);
|
|
592
|
+
return ret;
|
|
593
|
+
};
|
|
594
|
+
|
|
595
|
+
exports.__wbg_get_0da715ceaecea5c8 = function(arg0, arg1) {
|
|
596
|
+
const ret = arg0[arg1 >>> 0];
|
|
597
|
+
return ret;
|
|
598
|
+
};
|
|
599
|
+
|
|
600
|
+
exports.__wbg_get_458e874b43b18b25 = function() { return handleError(function (arg0, arg1) {
|
|
601
|
+
const ret = Reflect.get(arg0, arg1);
|
|
602
|
+
return ret;
|
|
603
|
+
}, arguments) };
|
|
604
|
+
|
|
605
|
+
exports.__wbg_instanceof_ArrayBuffer_67f3012529f6a2dd = function(arg0) {
|
|
606
|
+
let result;
|
|
607
|
+
try {
|
|
608
|
+
result = arg0 instanceof ArrayBuffer;
|
|
609
|
+
} catch (_) {
|
|
610
|
+
result = false;
|
|
611
|
+
}
|
|
612
|
+
const ret = result;
|
|
613
|
+
return ret;
|
|
614
|
+
};
|
|
615
|
+
|
|
616
|
+
exports.__wbg_instanceof_Map_ebb01a5b6b5ffd0b = function(arg0) {
|
|
617
|
+
let result;
|
|
618
|
+
try {
|
|
619
|
+
result = arg0 instanceof Map;
|
|
620
|
+
} catch (_) {
|
|
621
|
+
result = false;
|
|
622
|
+
}
|
|
623
|
+
const ret = result;
|
|
624
|
+
return ret;
|
|
625
|
+
};
|
|
626
|
+
|
|
627
|
+
exports.__wbg_instanceof_Uint8Array_9a8378d955933db7 = function(arg0) {
|
|
628
|
+
let result;
|
|
629
|
+
try {
|
|
630
|
+
result = arg0 instanceof Uint8Array;
|
|
631
|
+
} catch (_) {
|
|
632
|
+
result = false;
|
|
633
|
+
}
|
|
634
|
+
const ret = result;
|
|
635
|
+
return ret;
|
|
636
|
+
};
|
|
637
|
+
|
|
638
|
+
exports.__wbg_isArray_030cce220591fb41 = function(arg0) {
|
|
639
|
+
const ret = Array.isArray(arg0);
|
|
640
|
+
return ret;
|
|
641
|
+
};
|
|
642
|
+
|
|
643
|
+
exports.__wbg_isSafeInteger_1c0d1af5542e102a = function(arg0) {
|
|
644
|
+
const ret = Number.isSafeInteger(arg0);
|
|
645
|
+
return ret;
|
|
646
|
+
};
|
|
647
|
+
|
|
648
|
+
exports.__wbg_iterator_f370b34483c71a1c = function() {
|
|
649
|
+
const ret = Symbol.iterator;
|
|
650
|
+
return ret;
|
|
651
|
+
};
|
|
652
|
+
|
|
653
|
+
exports.__wbg_length_186546c51cd61acd = function(arg0) {
|
|
654
|
+
const ret = arg0.length;
|
|
655
|
+
return ret;
|
|
656
|
+
};
|
|
657
|
+
|
|
658
|
+
exports.__wbg_length_6bb7e81f9d7713e4 = function(arg0) {
|
|
659
|
+
const ret = arg0.length;
|
|
660
|
+
return ret;
|
|
661
|
+
};
|
|
662
|
+
|
|
663
|
+
exports.__wbg_new_19c25a3f2fa63a02 = function() {
|
|
664
|
+
const ret = new Object();
|
|
665
|
+
return ret;
|
|
666
|
+
};
|
|
667
|
+
|
|
668
|
+
exports.__wbg_new_1f3a344cf3123716 = function() {
|
|
669
|
+
const ret = new Array();
|
|
670
|
+
return ret;
|
|
671
|
+
};
|
|
672
|
+
|
|
673
|
+
exports.__wbg_new_638ebfaedbf32a5e = function(arg0) {
|
|
674
|
+
const ret = new Uint8Array(arg0);
|
|
675
|
+
return ret;
|
|
676
|
+
};
|
|
677
|
+
|
|
678
|
+
exports.__wbg_next_5b3530e612fde77d = function(arg0) {
|
|
679
|
+
const ret = arg0.next;
|
|
680
|
+
return ret;
|
|
681
|
+
};
|
|
682
|
+
|
|
683
|
+
exports.__wbg_next_692e82279131b03c = function() { return handleError(function (arg0) {
|
|
684
|
+
const ret = arg0.next();
|
|
685
|
+
return ret;
|
|
686
|
+
}, arguments) };
|
|
687
|
+
|
|
688
|
+
exports.__wbg_prototypesetcall_3d4a26c1ed734349 = function(arg0, arg1, arg2) {
|
|
689
|
+
Uint8Array.prototype.set.call(getArrayU8FromWasm0(arg0, arg1), arg2);
|
|
690
|
+
};
|
|
691
|
+
|
|
692
|
+
exports.__wbg_push_330b2eb93e4e1212 = function(arg0, arg1) {
|
|
693
|
+
const ret = arg0.push(arg1);
|
|
694
|
+
return ret;
|
|
695
|
+
};
|
|
696
|
+
|
|
697
|
+
exports.__wbg_set_453345bcda80b89a = function() { return handleError(function (arg0, arg1, arg2) {
|
|
698
|
+
const ret = Reflect.set(arg0, arg1, arg2);
|
|
699
|
+
return ret;
|
|
700
|
+
}, arguments) };
|
|
701
|
+
|
|
702
|
+
exports.__wbg_value_dd9372230531eade = function(arg0) {
|
|
703
|
+
const ret = arg0.value;
|
|
704
|
+
return ret;
|
|
705
|
+
};
|
|
706
|
+
|
|
707
|
+
exports.__wbg_wbindgenbigintgetasi64_ac743ece6ab9bba1 = function(arg0, arg1) {
|
|
708
|
+
const v = arg1;
|
|
709
|
+
const ret = typeof(v) === 'bigint' ? v : undefined;
|
|
710
|
+
getDataViewMemory0().setBigInt64(arg0 + 8 * 1, isLikeNone(ret) ? BigInt(0) : ret, true);
|
|
711
|
+
getDataViewMemory0().setInt32(arg0 + 4 * 0, !isLikeNone(ret), true);
|
|
712
|
+
};
|
|
713
|
+
|
|
714
|
+
exports.__wbg_wbindgenbooleanget_3fe6f642c7d97746 = function(arg0) {
|
|
715
|
+
const v = arg0;
|
|
716
|
+
const ret = typeof(v) === 'boolean' ? v : undefined;
|
|
717
|
+
return isLikeNone(ret) ? 0xFFFFFF : ret ? 1 : 0;
|
|
718
|
+
};
|
|
719
|
+
|
|
720
|
+
exports.__wbg_wbindgendebugstring_99ef257a3ddda34d = function(arg0, arg1) {
|
|
721
|
+
const ret = debugString(arg1);
|
|
722
|
+
const ptr1 = passStringToWasm0(ret, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
723
|
+
const len1 = WASM_VECTOR_LEN;
|
|
724
|
+
getDataViewMemory0().setInt32(arg0 + 4 * 1, len1, true);
|
|
725
|
+
getDataViewMemory0().setInt32(arg0 + 4 * 0, ptr1, true);
|
|
726
|
+
};
|
|
727
|
+
|
|
728
|
+
exports.__wbg_wbindgenin_d7a1ee10933d2d55 = function(arg0, arg1) {
|
|
729
|
+
const ret = arg0 in arg1;
|
|
730
|
+
return ret;
|
|
731
|
+
};
|
|
732
|
+
|
|
733
|
+
exports.__wbg_wbindgenisbigint_ecb90cc08a5a9154 = function(arg0) {
|
|
734
|
+
const ret = typeof(arg0) === 'bigint';
|
|
735
|
+
return ret;
|
|
736
|
+
};
|
|
737
|
+
|
|
738
|
+
exports.__wbg_wbindgenisfunction_8cee7dce3725ae74 = function(arg0) {
|
|
739
|
+
const ret = typeof(arg0) === 'function';
|
|
740
|
+
return ret;
|
|
741
|
+
};
|
|
742
|
+
|
|
743
|
+
exports.__wbg_wbindgenisobject_307a53c6bd97fbf8 = function(arg0) {
|
|
744
|
+
const val = arg0;
|
|
745
|
+
const ret = typeof(val) === 'object' && val !== null;
|
|
746
|
+
return ret;
|
|
747
|
+
};
|
|
748
|
+
|
|
749
|
+
exports.__wbg_wbindgenjsvaleq_e6f2ad59ccae1b58 = function(arg0, arg1) {
|
|
750
|
+
const ret = arg0 === arg1;
|
|
751
|
+
return ret;
|
|
752
|
+
};
|
|
753
|
+
|
|
754
|
+
exports.__wbg_wbindgenjsvallooseeq_9bec8c9be826bed1 = function(arg0, arg1) {
|
|
755
|
+
const ret = arg0 == arg1;
|
|
756
|
+
return ret;
|
|
757
|
+
};
|
|
758
|
+
|
|
759
|
+
exports.__wbg_wbindgennumberget_f74b4c7525ac05cb = function(arg0, arg1) {
|
|
760
|
+
const obj = arg1;
|
|
761
|
+
const ret = typeof(obj) === 'number' ? obj : undefined;
|
|
762
|
+
getDataViewMemory0().setFloat64(arg0 + 8 * 1, isLikeNone(ret) ? 0 : ret, true);
|
|
763
|
+
getDataViewMemory0().setInt32(arg0 + 4 * 0, !isLikeNone(ret), true);
|
|
764
|
+
};
|
|
765
|
+
|
|
766
|
+
exports.__wbg_wbindgenstringget_0f16a6ddddef376f = function(arg0, arg1) {
|
|
767
|
+
const obj = arg1;
|
|
768
|
+
const ret = typeof(obj) === 'string' ? obj : undefined;
|
|
769
|
+
var ptr1 = isLikeNone(ret) ? 0 : passStringToWasm0(ret, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
|
|
770
|
+
var len1 = WASM_VECTOR_LEN;
|
|
771
|
+
getDataViewMemory0().setInt32(arg0 + 4 * 1, len1, true);
|
|
772
|
+
getDataViewMemory0().setInt32(arg0 + 4 * 0, ptr1, true);
|
|
773
|
+
};
|
|
774
|
+
|
|
775
|
+
exports.__wbg_wbindgenthrow_451ec1a8469d7eb6 = function(arg0, arg1) {
|
|
776
|
+
throw new Error(getStringFromWasm0(arg0, arg1));
|
|
777
|
+
};
|
|
778
|
+
|
|
779
|
+
exports.__wbindgen_cast_2241b6af4c4b2941 = function(arg0, arg1) {
|
|
780
|
+
// Cast intrinsic for `Ref(String) -> Externref`.
|
|
781
|
+
const ret = getStringFromWasm0(arg0, arg1);
|
|
782
|
+
return ret;
|
|
783
|
+
};
|
|
784
|
+
|
|
785
|
+
exports.__wbindgen_cast_4625c577ab2ec9ee = function(arg0) {
|
|
786
|
+
// Cast intrinsic for `U64 -> Externref`.
|
|
787
|
+
const ret = BigInt.asUintN(64, arg0);
|
|
788
|
+
return ret;
|
|
789
|
+
};
|
|
790
|
+
|
|
791
|
+
exports.__wbindgen_cast_9ae0607507abb057 = function(arg0) {
|
|
792
|
+
// Cast intrinsic for `I64 -> Externref`.
|
|
793
|
+
const ret = arg0;
|
|
794
|
+
return ret;
|
|
795
|
+
};
|
|
796
|
+
|
|
797
|
+
exports.__wbindgen_cast_d6cd19b81560fd6e = function(arg0) {
|
|
798
|
+
// Cast intrinsic for `F64 -> Externref`.
|
|
799
|
+
const ret = arg0;
|
|
800
|
+
return ret;
|
|
801
|
+
};
|
|
802
|
+
|
|
803
|
+
exports.__wbindgen_init_externref_table = function() {
|
|
804
|
+
const table = wasm.__wbindgen_export_4;
|
|
805
|
+
const offset = table.grow(4);
|
|
806
|
+
table.set(0, undefined);
|
|
807
|
+
table.set(offset + 0, undefined);
|
|
808
|
+
table.set(offset + 1, null);
|
|
809
|
+
table.set(offset + 2, true);
|
|
810
|
+
table.set(offset + 3, false);
|
|
811
|
+
;
|
|
812
|
+
};
|
|
813
|
+
|
|
814
|
+
const wasmPath = `${__dirname}/lindera_wasm_bg.wasm`;
|
|
815
|
+
const wasmBytes = require('fs').readFileSync(wasmPath);
|
|
816
|
+
const wasmModule = new WebAssembly.Module(wasmBytes);
|
|
817
|
+
const wasm = exports.__wasm = new WebAssembly.Instance(wasmModule, imports).exports;
|
|
818
|
+
|
|
819
|
+
wasm.__wbindgen_start();
|
|
820
|
+
|
|
Binary file
|
package/package.json
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "lindera-wasm-nodejs-cc-cedict",
|
|
3
|
+
"description": "Lindera WASM with Chinese dictionary (CC-CEDICT) (nodejs target)",
|
|
4
|
+
"version": "1.2.1",
|
|
5
|
+
"license": "MIT",
|
|
6
|
+
"repository": {
|
|
7
|
+
"type": "git",
|
|
8
|
+
"url": "https://github.com/lindera/lindera-wasm"
|
|
9
|
+
},
|
|
10
|
+
"files": [
|
|
11
|
+
"lindera_wasm_bg.wasm",
|
|
12
|
+
"lindera_wasm.js",
|
|
13
|
+
"lindera_wasm.d.ts"
|
|
14
|
+
],
|
|
15
|
+
"main": "lindera_wasm.js",
|
|
16
|
+
"homepage": "https://github.com/lindera/lindera-wasm",
|
|
17
|
+
"types": "lindera_wasm.d.ts",
|
|
18
|
+
"keywords": [
|
|
19
|
+
"morphological",
|
|
20
|
+
"analysis",
|
|
21
|
+
"library",
|
|
22
|
+
"wasm",
|
|
23
|
+
"webassembly"
|
|
24
|
+
]
|
|
25
|
+
}
|