sudachi-ts 0.1.13 → 0.1.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -105,7 +105,7 @@ const config = await loadConfig('./sudachi.json');
105
105
  const dict = Dictionary.create();
106
106
  ```
107
107
 
108
- Example `sudachi.json`:
108
+ Example `sudachi.json`:
109
109
 
110
110
  ```json
111
111
  {
@@ -120,10 +120,14 @@ Example `sudachi.json`:
120
120
  }
121
121
  }
122
122
  ]
123
- }
124
- ```
125
-
126
- By default, Sudachi-TS enables a built-in compound-particle lexicon
123
+ }
124
+ ```
125
+
126
+ For non-absolute file references in config (dictionary files, plugin module paths,
127
+ and built-in plugin file settings), Sudachi-TS tries paths relative to the config
128
+ file first, then relative to the current working directory.
129
+
130
+ By default, Sudachi-TS enables a built-in compound-particle lexicon
127
131
  (`"enableDefaultCompoundParticles": true`) so forms such as `かも`, `のか`,
128
132
  and `だから` are tokenized as single morphemes. Set it to `false` to disable:
129
133
 
@@ -269,7 +273,7 @@ See [PLUGINS.md](./PLUGINS.md) for detailed plugin development guide.
269
273
  Quick local comparison for the PoC token chunker plugin:
270
274
 
271
275
  ```bash
272
- bun examples/token-chunker-plugin.ts /path/to/system.dic "東京大学"
276
+ npm exec tsx examples/token-chunker-plugin.ts /path/to/system.dic "東京大学"
273
277
  ```
274
278
 
275
279
  This example prints each token as `surface/reading` so the chunking impact on
@@ -328,23 +332,23 @@ See [CONFIG.md](./CONFIG.md) for detailed configuration options.
328
332
 
329
333
  ## Development
330
334
 
331
- ```bash
332
- # Clone repository
333
- git clone https://github.com/your-org/sudachi-ts.git
334
- cd sudachi-ts
335
-
336
- # Install dependencies
337
- bun install
338
-
339
- # Type check
340
- bun x tsc --noEmit
341
-
342
- # Run tests
343
- bun test
344
-
345
- # Lint
346
- bun x @biomejs/biome lint --write .
347
- ```
335
+ ```bash
336
+ # Clone repository
337
+ git clone https://github.com/your-org/sudachi-ts.git
338
+ cd sudachi-ts
339
+
340
+ # Install dependencies
341
+ npm install
342
+
343
+ # Type check
344
+ npm run typecheck
345
+
346
+ # Run tests
347
+ npm test
348
+
349
+ # Lint
350
+ npm run check:fix
351
+ ```
348
352
 
349
353
  ## Architecture
350
354
 
File without changes
File without changes
File without changes
File without changes
File without changes
@@ -15,8 +15,8 @@ export class Config {
15
15
  static async fromFile(filePath) {
16
16
  const content = await readFile(filePath, 'utf-8');
17
17
  const baseDir = dirname(filePath);
18
- const anchor = PathAnchor.filesystem(baseDir);
19
- return new Config(Settings.parse(content), anchor);
18
+ const anchor = PathAnchor.filesystem(baseDir).andThen(PathAnchor.none());
19
+ return new Config(Settings.parse(content, anchor), anchor);
20
20
  }
21
21
  static parse(json) {
22
22
  return new Config(Settings.parse(json));
@@ -61,7 +61,14 @@ export class Config {
61
61
  return this.settings.getIntList(key);
62
62
  }
63
63
  getPlugins(key) {
64
- return this.settings.getPlugins(key);
64
+ const plugins = this.settings.getPlugins(key);
65
+ if (!plugins) {
66
+ return null;
67
+ }
68
+ return plugins.map((plugin) => ({
69
+ className: plugin.className,
70
+ settings: plugin.settings.withAnchor(this.anchor),
71
+ }));
65
72
  }
66
73
  }
67
74
  export async function loadConfig(configPath) {
@@ -1,12 +1,18 @@
1
+ import { PathAnchor } from './pathAnchor.js';
1
2
  export type PluginConf<_T> = {
2
3
  className: string;
3
4
  settings: Settings;
4
5
  };
5
6
  export declare class Settings {
6
7
  private readonly data;
7
- constructor(data?: Record<string, unknown>);
8
+ private readonly anchor;
9
+ constructor(data?: Record<string, unknown>, anchor?: PathAnchor);
8
10
  static empty(): Settings;
9
- static parse(json: string, _basePath?: string): Settings;
11
+ static parse(json: string, basePathOrAnchor?: string | PathAnchor): Settings;
12
+ getAnchor(): PathAnchor;
13
+ withAnchor(anchor: PathAnchor): Settings;
14
+ getPath(key: string, defaultValue?: string): Promise<string | null>;
15
+ toObject(): Record<string, unknown>;
10
16
  getString(key: string, defaultValue?: string): string | null;
11
17
  getInt(key: string, defaultValue?: number): number;
12
18
  getBoolean(key: string, defaultValue: boolean): boolean;
@@ -1,17 +1,39 @@
1
+ import { PathAnchor } from './pathAnchor.js';
1
2
  export class Settings {
2
3
  data;
3
- constructor(data = {}) {
4
+ anchor;
5
+ constructor(data = {}, anchor = PathAnchor.none()) {
4
6
  this.data = { ...data };
7
+ this.anchor = anchor;
5
8
  }
6
9
  static empty() {
7
- return new Settings({});
10
+ return new Settings({}, PathAnchor.none());
8
11
  }
9
- static parse(json, _basePath) {
12
+ static parse(json, basePathOrAnchor) {
10
13
  const data = JSON.parse(json);
11
14
  if (typeof data !== 'object' || data === null) {
12
15
  throw new Error('root must be an object');
13
16
  }
14
- return new Settings(data);
17
+ if (typeof basePathOrAnchor === 'string') {
18
+ return new Settings(data, PathAnchor.filesystem(basePathOrAnchor).andThen(PathAnchor.none()));
19
+ }
20
+ return new Settings(data, basePathOrAnchor ?? PathAnchor.none());
21
+ }
22
+ getAnchor() {
23
+ return this.anchor;
24
+ }
25
+ withAnchor(anchor) {
26
+ return new Settings(this.data, anchor);
27
+ }
28
+ async getPath(key, defaultValue) {
29
+ const value = this.getString(key, defaultValue);
30
+ if (value === null) {
31
+ return null;
32
+ }
33
+ return await this.anchor.resolve(value);
34
+ }
35
+ toObject() {
36
+ return { ...this.data };
15
37
  }
16
38
  getString(key, defaultValue) {
17
39
  const value = this.data[key];
@@ -70,7 +92,7 @@ export class Settings {
70
92
  const obj = item;
71
93
  return {
72
94
  className: obj.class,
73
- settings: new Settings({ ...obj }),
95
+ settings: new Settings({ ...obj }, this.anchor),
74
96
  };
75
97
  }
76
98
  throw new Error(`sub-object for ${key} didn't have class key`);
@@ -79,9 +101,9 @@ export class Settings {
79
101
  return null;
80
102
  }
81
103
  withFallback(other) {
82
- return new Settings({ ...other.data, ...this.data });
104
+ return new Settings({ ...other.data, ...this.data }, this.anchor.andThen(other.anchor));
83
105
  }
84
106
  merge(overrides) {
85
- return new Settings({ ...this.data, ...overrides });
107
+ return new Settings({ ...this.data, ...overrides }, this.anchor);
86
108
  }
87
109
  }
@@ -1,3 +1,4 @@
1
+ import { readFile } from 'node:fs/promises';
1
2
  import { CategoryType } from './categoryType.js';
2
3
  export class CharacterCategory {
3
4
  static PATTERN_SPACES = /\s+/;
@@ -67,8 +68,7 @@ export class CharacterCategory {
67
68
  static async loadDefault() {
68
69
  const charCategory = new CharacterCategory();
69
70
  try {
70
- const response = await fetch(new URL('../resources/char.def', import.meta.url));
71
- const content = await response.text();
71
+ const content = await readFile(new URL('../resources/char.def', import.meta.url), 'utf-8');
72
72
  charCategory.readCharacterDefinition(content);
73
73
  }
74
74
  catch (e) {
@@ -44,8 +44,8 @@ export class DictionaryFactory {
44
44
  ensureLexiconSet().add(userDict.getLexicon());
45
45
  }
46
46
  }
47
- const loader = new PluginLoader();
48
- const defaultConfig = Config.parse(DEFAULT_CONFIG_JSON);
47
+ const loader = new PluginLoader(anchor);
48
+ const defaultConfig = Config.parse(DEFAULT_CONFIG_JSON).setAnchor(anchor);
49
49
  let inputTextPluginConfs = config.getPlugins('inputTextPlugin');
50
50
  if (!inputTextPluginConfs || inputTextPluginConfs.length === 0) {
51
51
  inputTextPluginConfs = defaultConfig.getPlugins('inputTextPlugin');
@@ -9,7 +9,7 @@ export class DefaultInputTextPlugin extends InputTextPlugin {
9
9
  if (this.initialized) {
10
10
  return;
11
11
  }
12
- const rewriteDefPath = this.settings.getString('rewriteDef');
12
+ const rewriteDefPath = await this.settings.getPath('rewriteDef');
13
13
  if (rewriteDefPath) {
14
14
  const content = await readFully(rewriteDefPath);
15
15
  this.readRewriteLists(content);
@@ -1,3 +1,4 @@
1
+ import { PathAnchor } from '../config/pathAnchor.js';
1
2
  import type { Settings } from '../config/settings.js';
2
3
  import type { Grammar } from '../dictionary/grammar.js';
3
4
  import type { Plugin } from './base.js';
@@ -11,6 +12,8 @@ export interface LoadedPlugin<T extends Plugin> {
11
12
  className: string;
12
13
  }
13
14
  export declare class PluginLoader {
15
+ private readonly anchor;
16
+ constructor(anchor?: PathAnchor);
14
17
  loadInputTextPlugin(className: string, settings: Settings): Promise<LoadedPlugin<InputTextPlugin>>;
15
18
  loadOovProviderPlugin(className: string, settings: Settings): Promise<LoadedPlugin<OovProviderPlugin>>;
16
19
  loadPathRewritePlugin(className: string, settings: Settings): Promise<LoadedPlugin<PathRewritePlugin>>;
@@ -35,6 +38,8 @@ export declare class PluginLoader {
35
38
  private loadPlugin;
36
39
  private findPluginClass;
37
40
  private isPluginConstructor;
41
+ private resolveClassSpecifier;
42
+ private isPathLikeSpecifier;
38
43
  private isBuiltIn;
39
44
  private getBuiltIn;
40
45
  }
@@ -1,3 +1,6 @@
1
+ import { isAbsolute, resolve } from 'node:path';
2
+ import { pathToFileURL } from 'node:url';
3
+ import { PathAnchor } from '../config/pathAnchor.js';
1
4
  import { InhibitConnectionPlugin } from './connection/inhibitConnectionPlugin.js';
2
5
  import { DefaultInputTextPlugin } from './inputText/defaultInputTextPlugin.js';
3
6
  import { IgnoreYomiganaPlugin } from './inputText/ignoreYomiganaPlugin.js';
@@ -9,6 +12,10 @@ import { JoinKatakanaOovPlugin } from './pathRewrite/joinKatakanaOovPlugin.js';
9
12
  import { JoinNumericPlugin } from './pathRewrite/joinNumericPlugin.js';
10
13
  import { TokenChunkerPlugin } from './pathRewrite/tokenChunkerPlugin.js';
11
14
  export class PluginLoader {
15
+ anchor;
16
+ constructor(anchor = PathAnchor.none()) {
17
+ this.anchor = anchor;
18
+ }
12
19
  async loadInputTextPlugin(className, settings) {
13
20
  const plugin = await this.loadPlugin(className, settings);
14
21
  return { plugin, className };
@@ -72,7 +79,8 @@ export class PluginLoader {
72
79
  PluginClass = this.getBuiltIn(className);
73
80
  }
74
81
  else {
75
- const module = await import(className);
82
+ const classSpecifier = await this.resolveClassSpecifier(className);
83
+ const module = await import(classSpecifier);
76
84
  PluginClass = this.findPluginClass(module, className);
77
85
  }
78
86
  const plugin = new PluginClass();
@@ -112,6 +120,24 @@ export class PluginLoader {
112
120
  return false;
113
121
  }
114
122
  }
123
+ async resolveClassSpecifier(className) {
124
+ if (this.anchor === PathAnchor.none() ||
125
+ !this.isPathLikeSpecifier(className)) {
126
+ return className;
127
+ }
128
+ const resolvedPath = await this.anchor.resolve(className);
129
+ const absolutePath = isAbsolute(resolvedPath)
130
+ ? resolvedPath
131
+ : resolve(resolvedPath);
132
+ return pathToFileURL(absolutePath).href;
133
+ }
134
+ isPathLikeSpecifier(className) {
135
+ return (className.startsWith('./') ||
136
+ className.startsWith('../') ||
137
+ className.startsWith('.\\') ||
138
+ className.startsWith('..\\') ||
139
+ isAbsolute(className));
140
+ }
115
141
  isBuiltIn(name) {
116
142
  return (name in BUILT_IN_PLUGINS || name.split('.').pop() in BUILT_IN_PLUGINS);
117
143
  }
@@ -11,12 +11,12 @@ export class MeCabOovProviderPlugin extends OovProviderPlugin {
11
11
  if (this.initialized) {
12
12
  return;
13
13
  }
14
- const charDefPath = this.settings.getString('charDef');
14
+ const charDefPath = await this.settings.getPath('charDef');
15
15
  if (charDefPath) {
16
16
  const content = await readFully(charDefPath);
17
17
  this.readCharacterProperty(content);
18
18
  }
19
- const unkDefPath = this.settings.getString('unkDef');
19
+ const unkDefPath = await this.settings.getPath('unkDef');
20
20
  const userPosMode = this.settings.getString(OovProviderPlugin.USER_POS, OovProviderPlugin.USER_POS_FORBID) ?? OovProviderPlugin.USER_POS_FORBID;
21
21
  if (unkDefPath) {
22
22
  const content = await readFully(unkDefPath);
package/package.json CHANGED
@@ -1,65 +1,69 @@
1
- {
2
- "name": "sudachi-ts",
3
- "version": "0.1.13",
4
- "description": "TypeScript port of Sudachi morphological analyzer for Japanese text",
5
- "keywords": [
6
- "morphological-analyzer",
7
- "nlp",
8
- "japanese",
9
- "tokenization",
10
- "natural-language-processing",
11
- "text-processing"
12
- ],
13
- "author": "Glen Stampoultzis",
14
- "license": "Apache-2.0",
15
- "repository": {
16
- "type": "git",
17
- "url": "https://github.com/gstamp/sudachi-ts.git",
18
- "directory": "sudachi-ts"
19
- },
20
- "bugs": "https://github.com/gstamp/sudachi-ts/issues",
21
- "homepage": "https://github.com/gstamp/sudachi-ts#readme",
22
- "type": "module",
23
- "main": "./build/src/index.js",
24
- "types": "./build/src/index.d.ts",
25
- "exports": {
26
- ".": "./build/src/index.js",
27
- "./dictionary": "./build/src/dictionary/index.js",
28
- "./config": "./build/src/config/index.js",
29
- "./plugins": "./build/src/plugins/index.js"
30
- },
31
- "bin": {
32
- "sudachi": "./build/bin/sudachi.js",
33
- "sudachi-build-system": "./build/bin/sudachi-build-system.js",
34
- "sudachi-build-user": "./build/bin/sudachi-build-user.js",
35
- "sudachi-print-dict": "./build/bin/sudachi-print-dict.js",
36
- "sudachi-print-header": "./build/bin/sudachi-print-header.js"
37
- },
38
- "files": [
39
- "build/",
40
- "README.md",
41
- "LICENSE"
42
- ],
43
- "engines": {
44
- "node": ">=18.0.0"
45
- },
1
+ {
2
+ "name": "sudachi-ts",
3
+ "version": "0.1.15",
4
+ "description": "TypeScript port of Sudachi morphological analyzer for Japanese text",
5
+ "keywords": [
6
+ "morphological-analyzer",
7
+ "nlp",
8
+ "japanese",
9
+ "tokenization",
10
+ "natural-language-processing",
11
+ "text-processing"
12
+ ],
13
+ "author": "Glen Stampoultzis",
14
+ "license": "Apache-2.0",
15
+ "repository": {
16
+ "type": "git",
17
+ "url": "https://github.com/gstamp/sudachi-ts.git",
18
+ "directory": "sudachi-ts"
19
+ },
20
+ "bugs": "https://github.com/gstamp/sudachi-ts/issues",
21
+ "homepage": "https://github.com/gstamp/sudachi-ts#readme",
22
+ "type": "module",
23
+ "main": "./build/src/index.js",
24
+ "types": "./build/src/index.d.ts",
25
+ "exports": {
26
+ ".": "./build/src/index.js",
27
+ "./dictionary": "./build/src/dictionary/index.js",
28
+ "./config": "./build/src/config/index.js",
29
+ "./plugins": "./build/src/plugins/index.js"
30
+ },
31
+ "bin": {
32
+ "sudachi": "./build/bin/sudachi.js",
33
+ "sudachi-build-system": "./build/bin/sudachi-build-system.js",
34
+ "sudachi-build-user": "./build/bin/sudachi-build-user.js",
35
+ "sudachi-print-dict": "./build/bin/sudachi-print-dict.js",
36
+ "sudachi-print-header": "./build/bin/sudachi-print-header.js"
37
+ },
38
+ "files": [
39
+ "build/",
40
+ "README.md",
41
+ "LICENSE"
42
+ ],
43
+ "engines": {
44
+ "node": ">=18.0.0"
45
+ },
46
46
  "scripts": {
47
47
  "build": "tsc --project tsconfig.build.json",
48
- "build:clean": "rm -rf build && npm run build",
48
+ "build:clean": "node -e \"require('node:fs').rmSync('build', { recursive: true, force: true })\" && npm run build",
49
49
  "prepack": "npm run build:clean",
50
50
  "lint": "biome lint src/",
51
51
  "format": "biome format src/",
52
- "check": "biome check src/ && tsc --noEmit",
53
- "check:fix": "biome check --write src/ && tsc --noEmit",
52
+ "typecheck": "tsc --noEmit",
53
+ "check": "biome check src/ && npm run typecheck",
54
+ "check:fix": "biome check --write src/ && npm run typecheck",
55
+ "test": "vitest run",
56
+ "test:watch": "vitest",
54
57
  "release": "./scripts/release.sh"
55
58
  },
56
59
  "devDependencies": {
57
60
  "@biomejs/biome": "^2.3.14",
58
- "@types/bun": "^1.1.0",
59
61
  "@types/node": "^22.0.0",
60
- "typescript": "^5.7.0"
62
+ "tsx": "^4.20.6",
63
+ "typescript": "^5.7.0",
64
+ "vitest": "^3.2.4"
61
65
  },
62
- "peerDependencies": {
63
- "typescript": "^5.0.0"
64
- }
65
- }
66
+ "peerDependencies": {
67
+ "typescript": "^5.0.0"
68
+ }
69
+ }