sudachi-ts 0.1.13 → 0.1.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +30 -23
- package/build/bin/sudachi-build-system.js +0 -0
- package/build/bin/sudachi-build-user.js +0 -0
- package/build/bin/sudachi-print-dict.js +0 -0
- package/build/bin/sudachi-print-header.js +0 -0
- package/build/bin/sudachi.js +0 -0
- package/build/src/config/config.js +10 -3
- package/build/src/config/settings.d.ts +8 -2
- package/build/src/config/settings.js +29 -7
- package/build/src/core/japaneseTokenizer.js +3 -0
- package/build/src/dictionary/characterCategory.js +2 -2
- package/build/src/dictionary/dictionaryFactory.js +2 -2
- package/build/src/plugins/inputText/defaultInputTextPlugin.js +1 -1
- package/build/src/plugins/loader.d.ts +5 -0
- package/build/src/plugins/loader.js +27 -1
- package/build/src/plugins/oov/meCabOovProviderPlugin.js +2 -2
- package/build/src/plugins/pathRewrite/base.d.ts +2 -0
- package/build/src/plugins/pathRewrite/base.js +1 -0
- package/build/src/plugins/pathRewrite/tokenChunkerPlugin.d.ts +2 -0
- package/build/src/plugins/pathRewrite/tokenChunkerPlugin.js +6 -0
- package/package.json +58 -54
package/README.md
CHANGED
|
@@ -105,7 +105,7 @@ const config = await loadConfig('./sudachi.json');
|
|
|
105
105
|
const dict = Dictionary.create();
|
|
106
106
|
```
|
|
107
107
|
|
|
108
|
-
Example `sudachi.json`:
|
|
108
|
+
Example `sudachi.json`:
|
|
109
109
|
|
|
110
110
|
```json
|
|
111
111
|
{
|
|
@@ -120,10 +120,14 @@ Example `sudachi.json`:
|
|
|
120
120
|
}
|
|
121
121
|
}
|
|
122
122
|
]
|
|
123
|
-
}
|
|
124
|
-
```
|
|
125
|
-
|
|
126
|
-
|
|
123
|
+
}
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
For non-absolute file references in config (dictionary files, plugin module paths,
|
|
127
|
+
and built-in plugin file settings), Sudachi-TS tries paths relative to the config
|
|
128
|
+
file first, then relative to the current working directory.
|
|
129
|
+
|
|
130
|
+
By default, Sudachi-TS enables a built-in compound-particle lexicon
|
|
127
131
|
(`"enableDefaultCompoundParticles": true`) so forms such as `かも`, `のか`,
|
|
128
132
|
and `だから` are tokenized as single morphemes. Set it to `false` to disable:
|
|
129
133
|
|
|
@@ -269,7 +273,7 @@ See [PLUGINS.md](./PLUGINS.md) for detailed plugin development guide.
|
|
|
269
273
|
Quick local comparison for the PoC token chunker plugin:
|
|
270
274
|
|
|
271
275
|
```bash
|
|
272
|
-
|
|
276
|
+
npm exec tsx examples/token-chunker-plugin.ts /path/to/system.dic "東京大学"
|
|
273
277
|
```
|
|
274
278
|
|
|
275
279
|
This example prints each token as `surface/reading` so the chunking impact on
|
|
@@ -277,6 +281,9 @@ readings is visible in the baseline vs plugin outputs.
|
|
|
277
281
|
`TokenChunkerPlugin` is designed and validated against the full Sudachi system
|
|
278
282
|
dictionary (`system_full.dic` / `system.dic`), so prefer full-dictionary checks
|
|
279
283
|
when adding or tuning chunk rules.
|
|
284
|
+
`TokenChunkerPlugin` is intended for `SplitMode.C` tokenization; calling
|
|
285
|
+
`tokenize(SplitMode.A, ...)` or `tokenize(SplitMode.B, ...)` with this plugin
|
|
286
|
+
enabled throws an error.
|
|
280
287
|
|
|
281
288
|
## Dictionary Building
|
|
282
289
|
|
|
@@ -328,23 +335,23 @@ See [CONFIG.md](./CONFIG.md) for detailed configuration options.
|
|
|
328
335
|
|
|
329
336
|
## Development
|
|
330
337
|
|
|
331
|
-
```bash
|
|
332
|
-
# Clone repository
|
|
333
|
-
git clone https://github.com/your-org/sudachi-ts.git
|
|
334
|
-
cd sudachi-ts
|
|
335
|
-
|
|
336
|
-
# Install dependencies
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
# Type check
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
# Run tests
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
# Lint
|
|
346
|
-
|
|
347
|
-
```
|
|
338
|
+
```bash
|
|
339
|
+
# Clone repository
|
|
340
|
+
git clone https://github.com/your-org/sudachi-ts.git
|
|
341
|
+
cd sudachi-ts
|
|
342
|
+
|
|
343
|
+
# Install dependencies
|
|
344
|
+
npm install
|
|
345
|
+
|
|
346
|
+
# Type check
|
|
347
|
+
npm run typecheck
|
|
348
|
+
|
|
349
|
+
# Run tests
|
|
350
|
+
npm test
|
|
351
|
+
|
|
352
|
+
# Lint
|
|
353
|
+
npm run check:fix
|
|
354
|
+
```
|
|
348
355
|
|
|
349
356
|
## Architecture
|
|
350
357
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
package/build/bin/sudachi.js
CHANGED
|
File without changes
|
|
@@ -15,8 +15,8 @@ export class Config {
|
|
|
15
15
|
static async fromFile(filePath) {
|
|
16
16
|
const content = await readFile(filePath, 'utf-8');
|
|
17
17
|
const baseDir = dirname(filePath);
|
|
18
|
-
const anchor = PathAnchor.filesystem(baseDir);
|
|
19
|
-
return new Config(Settings.parse(content), anchor);
|
|
18
|
+
const anchor = PathAnchor.filesystem(baseDir).andThen(PathAnchor.none());
|
|
19
|
+
return new Config(Settings.parse(content, anchor), anchor);
|
|
20
20
|
}
|
|
21
21
|
static parse(json) {
|
|
22
22
|
return new Config(Settings.parse(json));
|
|
@@ -61,7 +61,14 @@ export class Config {
|
|
|
61
61
|
return this.settings.getIntList(key);
|
|
62
62
|
}
|
|
63
63
|
getPlugins(key) {
|
|
64
|
-
|
|
64
|
+
const plugins = this.settings.getPlugins(key);
|
|
65
|
+
if (!plugins) {
|
|
66
|
+
return null;
|
|
67
|
+
}
|
|
68
|
+
return plugins.map((plugin) => ({
|
|
69
|
+
className: plugin.className,
|
|
70
|
+
settings: plugin.settings.withAnchor(this.anchor),
|
|
71
|
+
}));
|
|
65
72
|
}
|
|
66
73
|
}
|
|
67
74
|
export async function loadConfig(configPath) {
|
|
@@ -1,12 +1,18 @@
|
|
|
1
|
+
import { PathAnchor } from './pathAnchor.js';
|
|
1
2
|
export type PluginConf<_T> = {
|
|
2
3
|
className: string;
|
|
3
4
|
settings: Settings;
|
|
4
5
|
};
|
|
5
6
|
export declare class Settings {
|
|
6
7
|
private readonly data;
|
|
7
|
-
|
|
8
|
+
private readonly anchor;
|
|
9
|
+
constructor(data?: Record<string, unknown>, anchor?: PathAnchor);
|
|
8
10
|
static empty(): Settings;
|
|
9
|
-
static parse(json: string,
|
|
11
|
+
static parse(json: string, basePathOrAnchor?: string | PathAnchor): Settings;
|
|
12
|
+
getAnchor(): PathAnchor;
|
|
13
|
+
withAnchor(anchor: PathAnchor): Settings;
|
|
14
|
+
getPath(key: string, defaultValue?: string): Promise<string | null>;
|
|
15
|
+
toObject(): Record<string, unknown>;
|
|
10
16
|
getString(key: string, defaultValue?: string): string | null;
|
|
11
17
|
getInt(key: string, defaultValue?: number): number;
|
|
12
18
|
getBoolean(key: string, defaultValue: boolean): boolean;
|
|
@@ -1,17 +1,39 @@
|
|
|
1
|
+
import { PathAnchor } from './pathAnchor.js';
|
|
1
2
|
export class Settings {
|
|
2
3
|
data;
|
|
3
|
-
|
|
4
|
+
anchor;
|
|
5
|
+
constructor(data = {}, anchor = PathAnchor.none()) {
|
|
4
6
|
this.data = { ...data };
|
|
7
|
+
this.anchor = anchor;
|
|
5
8
|
}
|
|
6
9
|
static empty() {
|
|
7
|
-
return new Settings({});
|
|
10
|
+
return new Settings({}, PathAnchor.none());
|
|
8
11
|
}
|
|
9
|
-
static parse(json,
|
|
12
|
+
static parse(json, basePathOrAnchor) {
|
|
10
13
|
const data = JSON.parse(json);
|
|
11
14
|
if (typeof data !== 'object' || data === null) {
|
|
12
15
|
throw new Error('root must be an object');
|
|
13
16
|
}
|
|
14
|
-
|
|
17
|
+
if (typeof basePathOrAnchor === 'string') {
|
|
18
|
+
return new Settings(data, PathAnchor.filesystem(basePathOrAnchor).andThen(PathAnchor.none()));
|
|
19
|
+
}
|
|
20
|
+
return new Settings(data, basePathOrAnchor ?? PathAnchor.none());
|
|
21
|
+
}
|
|
22
|
+
getAnchor() {
|
|
23
|
+
return this.anchor;
|
|
24
|
+
}
|
|
25
|
+
withAnchor(anchor) {
|
|
26
|
+
return new Settings(this.data, anchor);
|
|
27
|
+
}
|
|
28
|
+
async getPath(key, defaultValue) {
|
|
29
|
+
const value = this.getString(key, defaultValue);
|
|
30
|
+
if (value === null) {
|
|
31
|
+
return null;
|
|
32
|
+
}
|
|
33
|
+
return await this.anchor.resolve(value);
|
|
34
|
+
}
|
|
35
|
+
toObject() {
|
|
36
|
+
return { ...this.data };
|
|
15
37
|
}
|
|
16
38
|
getString(key, defaultValue) {
|
|
17
39
|
const value = this.data[key];
|
|
@@ -70,7 +92,7 @@ export class Settings {
|
|
|
70
92
|
const obj = item;
|
|
71
93
|
return {
|
|
72
94
|
className: obj.class,
|
|
73
|
-
settings: new Settings({ ...obj }),
|
|
95
|
+
settings: new Settings({ ...obj }, this.anchor),
|
|
74
96
|
};
|
|
75
97
|
}
|
|
76
98
|
throw new Error(`sub-object for ${key} didn't have class key`);
|
|
@@ -79,9 +101,9 @@ export class Settings {
|
|
|
79
101
|
return null;
|
|
80
102
|
}
|
|
81
103
|
withFallback(other) {
|
|
82
|
-
return new Settings({ ...other.data, ...this.data });
|
|
104
|
+
return new Settings({ ...other.data, ...this.data }, this.anchor.andThen(other.anchor));
|
|
83
105
|
}
|
|
84
106
|
merge(overrides) {
|
|
85
|
-
return new Settings({ ...this.data, ...overrides });
|
|
107
|
+
return new Settings({ ...this.data, ...overrides }, this.anchor);
|
|
86
108
|
}
|
|
87
109
|
}
|
|
@@ -140,6 +140,9 @@ export class JapaneseTokenizer {
|
|
|
140
140
|
return builder.build();
|
|
141
141
|
}
|
|
142
142
|
tokenizeSentence(mode, input) {
|
|
143
|
+
for (const plugin of this.pathRewritePlugins) {
|
|
144
|
+
plugin.validateSplitMode(mode);
|
|
145
|
+
}
|
|
143
146
|
this.buildLattice(input);
|
|
144
147
|
const path = this.lattice.getBestPath();
|
|
145
148
|
for (const plugin of this.pathRewritePlugins) {
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { readFile } from 'node:fs/promises';
|
|
1
2
|
import { CategoryType } from './categoryType.js';
|
|
2
3
|
export class CharacterCategory {
|
|
3
4
|
static PATTERN_SPACES = /\s+/;
|
|
@@ -67,8 +68,7 @@ export class CharacterCategory {
|
|
|
67
68
|
static async loadDefault() {
|
|
68
69
|
const charCategory = new CharacterCategory();
|
|
69
70
|
try {
|
|
70
|
-
const
|
|
71
|
-
const content = await response.text();
|
|
71
|
+
const content = await readFile(new URL('../resources/char.def', import.meta.url), 'utf-8');
|
|
72
72
|
charCategory.readCharacterDefinition(content);
|
|
73
73
|
}
|
|
74
74
|
catch (e) {
|
|
@@ -44,8 +44,8 @@ export class DictionaryFactory {
|
|
|
44
44
|
ensureLexiconSet().add(userDict.getLexicon());
|
|
45
45
|
}
|
|
46
46
|
}
|
|
47
|
-
const loader = new PluginLoader();
|
|
48
|
-
const defaultConfig = Config.parse(DEFAULT_CONFIG_JSON);
|
|
47
|
+
const loader = new PluginLoader(anchor);
|
|
48
|
+
const defaultConfig = Config.parse(DEFAULT_CONFIG_JSON).setAnchor(anchor);
|
|
49
49
|
let inputTextPluginConfs = config.getPlugins('inputTextPlugin');
|
|
50
50
|
if (!inputTextPluginConfs || inputTextPluginConfs.length === 0) {
|
|
51
51
|
inputTextPluginConfs = defaultConfig.getPlugins('inputTextPlugin');
|
|
@@ -9,7 +9,7 @@ export class DefaultInputTextPlugin extends InputTextPlugin {
|
|
|
9
9
|
if (this.initialized) {
|
|
10
10
|
return;
|
|
11
11
|
}
|
|
12
|
-
const rewriteDefPath = this.settings.
|
|
12
|
+
const rewriteDefPath = await this.settings.getPath('rewriteDef');
|
|
13
13
|
if (rewriteDefPath) {
|
|
14
14
|
const content = await readFully(rewriteDefPath);
|
|
15
15
|
this.readRewriteLists(content);
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { PathAnchor } from '../config/pathAnchor.js';
|
|
1
2
|
import type { Settings } from '../config/settings.js';
|
|
2
3
|
import type { Grammar } from '../dictionary/grammar.js';
|
|
3
4
|
import type { Plugin } from './base.js';
|
|
@@ -11,6 +12,8 @@ export interface LoadedPlugin<T extends Plugin> {
|
|
|
11
12
|
className: string;
|
|
12
13
|
}
|
|
13
14
|
export declare class PluginLoader {
|
|
15
|
+
private readonly anchor;
|
|
16
|
+
constructor(anchor?: PathAnchor);
|
|
14
17
|
loadInputTextPlugin(className: string, settings: Settings): Promise<LoadedPlugin<InputTextPlugin>>;
|
|
15
18
|
loadOovProviderPlugin(className: string, settings: Settings): Promise<LoadedPlugin<OovProviderPlugin>>;
|
|
16
19
|
loadPathRewritePlugin(className: string, settings: Settings): Promise<LoadedPlugin<PathRewritePlugin>>;
|
|
@@ -35,6 +38,8 @@ export declare class PluginLoader {
|
|
|
35
38
|
private loadPlugin;
|
|
36
39
|
private findPluginClass;
|
|
37
40
|
private isPluginConstructor;
|
|
41
|
+
private resolveClassSpecifier;
|
|
42
|
+
private isPathLikeSpecifier;
|
|
38
43
|
private isBuiltIn;
|
|
39
44
|
private getBuiltIn;
|
|
40
45
|
}
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
import { isAbsolute, resolve } from 'node:path';
|
|
2
|
+
import { pathToFileURL } from 'node:url';
|
|
3
|
+
import { PathAnchor } from '../config/pathAnchor.js';
|
|
1
4
|
import { InhibitConnectionPlugin } from './connection/inhibitConnectionPlugin.js';
|
|
2
5
|
import { DefaultInputTextPlugin } from './inputText/defaultInputTextPlugin.js';
|
|
3
6
|
import { IgnoreYomiganaPlugin } from './inputText/ignoreYomiganaPlugin.js';
|
|
@@ -9,6 +12,10 @@ import { JoinKatakanaOovPlugin } from './pathRewrite/joinKatakanaOovPlugin.js';
|
|
|
9
12
|
import { JoinNumericPlugin } from './pathRewrite/joinNumericPlugin.js';
|
|
10
13
|
import { TokenChunkerPlugin } from './pathRewrite/tokenChunkerPlugin.js';
|
|
11
14
|
export class PluginLoader {
|
|
15
|
+
anchor;
|
|
16
|
+
constructor(anchor = PathAnchor.none()) {
|
|
17
|
+
this.anchor = anchor;
|
|
18
|
+
}
|
|
12
19
|
async loadInputTextPlugin(className, settings) {
|
|
13
20
|
const plugin = await this.loadPlugin(className, settings);
|
|
14
21
|
return { plugin, className };
|
|
@@ -72,7 +79,8 @@ export class PluginLoader {
|
|
|
72
79
|
PluginClass = this.getBuiltIn(className);
|
|
73
80
|
}
|
|
74
81
|
else {
|
|
75
|
-
const
|
|
82
|
+
const classSpecifier = await this.resolveClassSpecifier(className);
|
|
83
|
+
const module = await import(classSpecifier);
|
|
76
84
|
PluginClass = this.findPluginClass(module, className);
|
|
77
85
|
}
|
|
78
86
|
const plugin = new PluginClass();
|
|
@@ -112,6 +120,24 @@ export class PluginLoader {
|
|
|
112
120
|
return false;
|
|
113
121
|
}
|
|
114
122
|
}
|
|
123
|
+
async resolveClassSpecifier(className) {
|
|
124
|
+
if (this.anchor === PathAnchor.none() ||
|
|
125
|
+
!this.isPathLikeSpecifier(className)) {
|
|
126
|
+
return className;
|
|
127
|
+
}
|
|
128
|
+
const resolvedPath = await this.anchor.resolve(className);
|
|
129
|
+
const absolutePath = isAbsolute(resolvedPath)
|
|
130
|
+
? resolvedPath
|
|
131
|
+
: resolve(resolvedPath);
|
|
132
|
+
return pathToFileURL(absolutePath).href;
|
|
133
|
+
}
|
|
134
|
+
isPathLikeSpecifier(className) {
|
|
135
|
+
return (className.startsWith('./') ||
|
|
136
|
+
className.startsWith('../') ||
|
|
137
|
+
className.startsWith('.\\') ||
|
|
138
|
+
className.startsWith('..\\') ||
|
|
139
|
+
isAbsolute(className));
|
|
140
|
+
}
|
|
115
141
|
isBuiltIn(name) {
|
|
116
142
|
return (name in BUILT_IN_PLUGINS || name.split('.').pop() in BUILT_IN_PLUGINS);
|
|
117
143
|
}
|
|
@@ -11,12 +11,12 @@ export class MeCabOovProviderPlugin extends OovProviderPlugin {
|
|
|
11
11
|
if (this.initialized) {
|
|
12
12
|
return;
|
|
13
13
|
}
|
|
14
|
-
const charDefPath = this.settings.
|
|
14
|
+
const charDefPath = await this.settings.getPath('charDef');
|
|
15
15
|
if (charDefPath) {
|
|
16
16
|
const content = await readFully(charDefPath);
|
|
17
17
|
this.readCharacterProperty(content);
|
|
18
18
|
}
|
|
19
|
-
const unkDefPath = this.settings.
|
|
19
|
+
const unkDefPath = await this.settings.getPath('unkDef');
|
|
20
20
|
const userPosMode = this.settings.getString(OovProviderPlugin.USER_POS, OovProviderPlugin.USER_POS_FORBID) ?? OovProviderPlugin.USER_POS_FORBID;
|
|
21
21
|
if (unkDefPath) {
|
|
22
22
|
const content = await readFully(unkDefPath);
|
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
import type { InputText } from '../../core/inputText.js';
|
|
2
2
|
import type { Lattice, LatticeNode } from '../../core/lattice.js';
|
|
3
|
+
import type { SplitMode } from '../../core/tokenizer.js';
|
|
3
4
|
import type { CategoryType } from '../../dictionary/categoryType.js';
|
|
4
5
|
import type { Grammar } from '../../dictionary/grammar.js';
|
|
5
6
|
import { Plugin } from '../base.js';
|
|
6
7
|
export declare abstract class PathRewritePlugin extends Plugin {
|
|
7
8
|
setUp(_grammar: Grammar): void;
|
|
9
|
+
validateSplitMode(_mode: SplitMode): void;
|
|
8
10
|
abstract rewrite(text: InputText, path: LatticeNode[], lattice: Lattice): void;
|
|
9
11
|
concatenate(path: LatticeNode[], begin: number, end: number, lattice: Lattice, normalizedForm?: string | null): LatticeNode;
|
|
10
12
|
concatenateOov(path: LatticeNode[], begin: number, end: number, posId: number, lattice: Lattice): LatticeNode;
|
|
@@ -2,6 +2,7 @@ import { WordInfo } from '../../dictionary/wordInfo.js';
|
|
|
2
2
|
import { Plugin } from '../base.js';
|
|
3
3
|
export class PathRewritePlugin extends Plugin {
|
|
4
4
|
setUp(_grammar) { }
|
|
5
|
+
validateSplitMode(_mode) { }
|
|
5
6
|
concatenate(path, begin, end, lattice, normalizedForm = null) {
|
|
6
7
|
if (begin >= end) {
|
|
7
8
|
throw new Error('begin >= end');
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import type { InputText } from '../../core/inputText.js';
|
|
2
2
|
import type { Lattice, LatticeNode } from '../../core/lattice.js';
|
|
3
|
+
import { SplitMode } from '../../core/tokenizer.js';
|
|
3
4
|
import type { Grammar } from '../../dictionary/grammar.js';
|
|
4
5
|
import { PathRewritePlugin } from './base.js';
|
|
5
6
|
export declare class TokenChunkerPlugin extends PathRewritePlugin {
|
|
@@ -7,6 +8,7 @@ export declare class TokenChunkerPlugin extends PathRewritePlugin {
|
|
|
7
8
|
private enablePatternRules;
|
|
8
9
|
private enableBroadRules;
|
|
9
10
|
setUp(grammar: Grammar): void;
|
|
11
|
+
validateSplitMode(mode: SplitMode): void;
|
|
10
12
|
rewrite(_text: InputText, path: LatticeNode[], lattice: Lattice): void;
|
|
11
13
|
private toInitialChunks;
|
|
12
14
|
private applyPatternStage;
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { SplitMode } from '../../core/tokenizer.js';
|
|
1
2
|
import { WordInfo } from '../../dictionary/wordInfo.js';
|
|
2
3
|
import { PathRewritePlugin } from './base.js';
|
|
3
4
|
export class TokenChunkerPlugin extends PathRewritePlugin {
|
|
@@ -9,6 +10,11 @@ export class TokenChunkerPlugin extends PathRewritePlugin {
|
|
|
9
10
|
this.enablePatternRules = this.settings.getBoolean('enablePatternRules', true);
|
|
10
11
|
this.enableBroadRules = this.settings.getBoolean('enableBroadRules', false);
|
|
11
12
|
}
|
|
13
|
+
validateSplitMode(mode) {
|
|
14
|
+
if (mode !== SplitMode.C) {
|
|
15
|
+
throw new Error('TokenChunkerPlugin requires SplitMode.C. Use tokenizer.tokenize(text) or tokenizer.tokenize(SplitMode.C, text).');
|
|
16
|
+
}
|
|
17
|
+
}
|
|
12
18
|
rewrite(_text, path, lattice) {
|
|
13
19
|
if (path.length === 0) {
|
|
14
20
|
return;
|
package/package.json
CHANGED
|
@@ -1,65 +1,69 @@
|
|
|
1
|
-
{
|
|
2
|
-
"name": "sudachi-ts",
|
|
3
|
-
"version": "0.1.
|
|
4
|
-
"description": "TypeScript port of Sudachi morphological analyzer for Japanese text",
|
|
5
|
-
"keywords": [
|
|
6
|
-
"morphological-analyzer",
|
|
7
|
-
"nlp",
|
|
8
|
-
"japanese",
|
|
9
|
-
"tokenization",
|
|
10
|
-
"natural-language-processing",
|
|
11
|
-
"text-processing"
|
|
12
|
-
],
|
|
13
|
-
"author": "Glen Stampoultzis",
|
|
14
|
-
"license": "Apache-2.0",
|
|
15
|
-
"repository": {
|
|
16
|
-
"type": "git",
|
|
17
|
-
"url": "https://github.com/gstamp/sudachi-ts.git",
|
|
18
|
-
"directory": "sudachi-ts"
|
|
19
|
-
},
|
|
20
|
-
"bugs": "https://github.com/gstamp/sudachi-ts/issues",
|
|
21
|
-
"homepage": "https://github.com/gstamp/sudachi-ts#readme",
|
|
22
|
-
"type": "module",
|
|
23
|
-
"main": "./build/src/index.js",
|
|
24
|
-
"types": "./build/src/index.d.ts",
|
|
25
|
-
"exports": {
|
|
26
|
-
".": "./build/src/index.js",
|
|
27
|
-
"./dictionary": "./build/src/dictionary/index.js",
|
|
28
|
-
"./config": "./build/src/config/index.js",
|
|
29
|
-
"./plugins": "./build/src/plugins/index.js"
|
|
30
|
-
},
|
|
31
|
-
"bin": {
|
|
32
|
-
"sudachi": "./build/bin/sudachi.js",
|
|
33
|
-
"sudachi-build-system": "./build/bin/sudachi-build-system.js",
|
|
34
|
-
"sudachi-build-user": "./build/bin/sudachi-build-user.js",
|
|
35
|
-
"sudachi-print-dict": "./build/bin/sudachi-print-dict.js",
|
|
36
|
-
"sudachi-print-header": "./build/bin/sudachi-print-header.js"
|
|
37
|
-
},
|
|
38
|
-
"files": [
|
|
39
|
-
"build/",
|
|
40
|
-
"README.md",
|
|
41
|
-
"LICENSE"
|
|
42
|
-
],
|
|
43
|
-
"engines": {
|
|
44
|
-
"node": ">=18.0.0"
|
|
45
|
-
},
|
|
1
|
+
{
|
|
2
|
+
"name": "sudachi-ts",
|
|
3
|
+
"version": "0.1.16",
|
|
4
|
+
"description": "TypeScript port of Sudachi morphological analyzer for Japanese text",
|
|
5
|
+
"keywords": [
|
|
6
|
+
"morphological-analyzer",
|
|
7
|
+
"nlp",
|
|
8
|
+
"japanese",
|
|
9
|
+
"tokenization",
|
|
10
|
+
"natural-language-processing",
|
|
11
|
+
"text-processing"
|
|
12
|
+
],
|
|
13
|
+
"author": "Glen Stampoultzis",
|
|
14
|
+
"license": "Apache-2.0",
|
|
15
|
+
"repository": {
|
|
16
|
+
"type": "git",
|
|
17
|
+
"url": "https://github.com/gstamp/sudachi-ts.git",
|
|
18
|
+
"directory": "sudachi-ts"
|
|
19
|
+
},
|
|
20
|
+
"bugs": "https://github.com/gstamp/sudachi-ts/issues",
|
|
21
|
+
"homepage": "https://github.com/gstamp/sudachi-ts#readme",
|
|
22
|
+
"type": "module",
|
|
23
|
+
"main": "./build/src/index.js",
|
|
24
|
+
"types": "./build/src/index.d.ts",
|
|
25
|
+
"exports": {
|
|
26
|
+
".": "./build/src/index.js",
|
|
27
|
+
"./dictionary": "./build/src/dictionary/index.js",
|
|
28
|
+
"./config": "./build/src/config/index.js",
|
|
29
|
+
"./plugins": "./build/src/plugins/index.js"
|
|
30
|
+
},
|
|
31
|
+
"bin": {
|
|
32
|
+
"sudachi": "./build/bin/sudachi.js",
|
|
33
|
+
"sudachi-build-system": "./build/bin/sudachi-build-system.js",
|
|
34
|
+
"sudachi-build-user": "./build/bin/sudachi-build-user.js",
|
|
35
|
+
"sudachi-print-dict": "./build/bin/sudachi-print-dict.js",
|
|
36
|
+
"sudachi-print-header": "./build/bin/sudachi-print-header.js"
|
|
37
|
+
},
|
|
38
|
+
"files": [
|
|
39
|
+
"build/",
|
|
40
|
+
"README.md",
|
|
41
|
+
"LICENSE"
|
|
42
|
+
],
|
|
43
|
+
"engines": {
|
|
44
|
+
"node": ">=18.0.0"
|
|
45
|
+
},
|
|
46
46
|
"scripts": {
|
|
47
47
|
"build": "tsc --project tsconfig.build.json",
|
|
48
|
-
"build:clean": "
|
|
48
|
+
"build:clean": "node -e \"require('node:fs').rmSync('build', { recursive: true, force: true })\" && npm run build",
|
|
49
49
|
"prepack": "npm run build:clean",
|
|
50
50
|
"lint": "biome lint src/",
|
|
51
51
|
"format": "biome format src/",
|
|
52
|
-
"
|
|
53
|
-
"check
|
|
52
|
+
"typecheck": "tsc --noEmit",
|
|
53
|
+
"check": "biome check src/ && npm run typecheck",
|
|
54
|
+
"check:fix": "biome check --write src/ && npm run typecheck",
|
|
55
|
+
"test": "vitest run",
|
|
56
|
+
"test:watch": "vitest",
|
|
54
57
|
"release": "./scripts/release.sh"
|
|
55
58
|
},
|
|
56
59
|
"devDependencies": {
|
|
57
60
|
"@biomejs/biome": "^2.3.14",
|
|
58
|
-
"@types/bun": "^1.1.0",
|
|
59
61
|
"@types/node": "^22.0.0",
|
|
60
|
-
"
|
|
62
|
+
"tsx": "^4.20.6",
|
|
63
|
+
"typescript": "^5.7.0",
|
|
64
|
+
"vitest": "^3.2.4"
|
|
61
65
|
},
|
|
62
|
-
"peerDependencies": {
|
|
63
|
-
"typescript": "^5.0.0"
|
|
64
|
-
}
|
|
65
|
-
}
|
|
66
|
+
"peerDependencies": {
|
|
67
|
+
"typescript": "^5.0.0"
|
|
68
|
+
}
|
|
69
|
+
}
|