sudachi-ts 0.1.21 → 0.1.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -46,6 +46,15 @@ export class DictionaryFactory {
46
46
  }
47
47
  const loader = new PluginLoader(anchor);
48
48
  const defaultConfig = Config.parse(DEFAULT_CONFIG_JSON).setAnchor(anchor);
49
+ let editConnectionPluginConfs = config.getPlugins('editConnectionCostPlugin');
50
+ if (!editConnectionPluginConfs || editConnectionPluginConfs.length === 0) {
51
+ editConnectionPluginConfs = defaultConfig.getPlugins('editConnectionCostPlugin');
52
+ }
53
+ if (editConnectionPluginConfs && editConnectionPluginConfs.length > 0) {
54
+ for (const loaded of await loader.loadEditConnectionCostPlugins(editConnectionPluginConfs, grammar, lexicon)) {
55
+ loaded.plugin.edit(grammar);
56
+ }
57
+ }
49
58
  let inputTextPluginConfs = config.getPlugins('inputTextPlugin');
50
59
  if (!inputTextPluginConfs || inputTextPluginConfs.length === 0) {
51
60
  inputTextPluginConfs = defaultConfig.getPlugins('inputTextPlugin');
@@ -24,7 +24,7 @@ export { DEPTH, MAX_COMPONENT_LENGTH, POS } from './dictionary/pos.js';
24
24
  export { PartialPOS, PosMatcher } from './dictionary/posMatcher.js';
25
25
  export { WordInfo } from './dictionary/wordInfo.js';
26
26
  export { DoubleArray } from './dictionary-build/doubleArray.js';
27
- export { EditConnectionCostPlugin, InputTextPlugin, type LoadedPlugin, MorphemeFormatterPlugin, OovProviderPlugin, PathRewritePlugin, Plugin, PluginLoader, } from './plugins/index.js';
27
+ export { EditConnectionCostPlugin, InputTextPlugin, type LoadedPlugin, MorphemeFormatterPlugin, OovProviderPlugin, PathRewritePlugin, Plugin, PluginLoader, TargetedConnectionCostPlugin, } from './plugins/index.js';
28
28
  export type { NonBreakChecker } from './sentdetect/sentenceDetector.js';
29
29
  export { DEFAULT_LIMIT, SentenceDetector, } from './sentdetect/sentenceDetector.js';
30
30
  export { applyMask, dic, dicIdMask, MAX_DIC_ID, MAX_WORD_ID, make, word, } from './utils/wordId.js';
@@ -15,7 +15,7 @@ export { DEPTH, MAX_COMPONENT_LENGTH, POS } from './dictionary/pos.js';
15
15
  export { PartialPOS, PosMatcher } from './dictionary/posMatcher.js';
16
16
  export { WordInfo } from './dictionary/wordInfo.js';
17
17
  export { DoubleArray } from './dictionary-build/doubleArray.js';
18
- export { EditConnectionCostPlugin, InputTextPlugin, MorphemeFormatterPlugin, OovProviderPlugin, PathRewritePlugin, Plugin, PluginLoader, } from './plugins/index.js';
18
+ export { EditConnectionCostPlugin, InputTextPlugin, MorphemeFormatterPlugin, OovProviderPlugin, PathRewritePlugin, Plugin, PluginLoader, TargetedConnectionCostPlugin, } from './plugins/index.js';
19
19
  export { DEFAULT_LIMIT, SentenceDetector, } from './sentdetect/sentenceDetector.js';
20
20
  export { applyMask, dic, dicIdMask, MAX_DIC_ID, MAX_WORD_ID, make, word, } from './utils/wordId.js';
21
21
  export { addNth, hasNth, MAX_LENGTH, nth } from './utils/wordMask.js';
@@ -1,4 +1,4 @@
1
- import type { Settings } from '../config/settings.js';
1
+ import { Settings } from '../config/settings.js';
2
2
  export declare abstract class Plugin {
3
3
  protected settings: Settings;
4
4
  constructor();
@@ -1,7 +1,8 @@
1
+ import { Settings } from '../config/settings.js';
1
2
  export class Plugin {
2
3
  settings;
3
4
  constructor() {
4
- this.settings = { getString: () => null };
5
+ this.settings = Settings.empty();
5
6
  }
6
7
  setSettings(settings) {
7
8
  this.settings = settings;
@@ -1,7 +1,8 @@
1
1
  import type { Grammar } from '../../dictionary/grammar.js';
2
+ import type { Lexicon } from '../../dictionary/lexicon.js';
2
3
  import { Plugin } from '../base.js';
3
4
  export declare abstract class EditConnectionCostPlugin extends Plugin {
4
- setUp(_grammar: Grammar): void;
5
+ setUp(_grammar: Grammar, _lexicon?: Lexicon): void;
5
6
  abstract edit(grammar: Grammar): void;
6
7
  inhibitConnection(grammar: Grammar, left: number, right: number): void;
7
8
  }
@@ -1,6 +1,6 @@
1
1
  import { Plugin } from '../base.js';
2
2
  export class EditConnectionCostPlugin extends Plugin {
3
- setUp(_grammar) { }
3
+ setUp(_grammar, _lexicon) { }
4
4
  inhibitConnection(grammar, left, right) {
5
5
  grammar.setConnectCost(left, right, grammar.INHIBITED_CONNECTION);
6
6
  }
@@ -0,0 +1,13 @@
1
+ import type { Grammar } from '../../dictionary/grammar.js';
2
+ import type { Lexicon } from '../../dictionary/lexicon.js';
3
+ import { EditConnectionCostPlugin } from './base.js';
4
+ export declare class TargetedConnectionCostPlugin extends EditConnectionCostPlugin {
5
+ private rules;
6
+ setUp(grammar: Grammar, lexicon?: Lexicon): void;
7
+ edit(grammar: Grammar): void;
8
+ private resolveRule;
9
+ private resolveWordId;
10
+ private normalizePos;
11
+ private requireConnectionRule;
12
+ private requireEntryTarget;
13
+ }
@@ -0,0 +1,102 @@
1
+ import { EditConnectionCostPlugin } from './base.js';
2
+ export class TargetedConnectionCostPlugin extends EditConnectionCostPlugin {
3
+ rules = [];
4
+ setUp(grammar, lexicon) {
5
+ if (!lexicon) {
6
+ throw new Error('TargetedConnectionCostPlugin requires the lexicon during setup');
7
+ }
8
+ const rawRules = this.settings.toObject().rules;
9
+ if (!Array.isArray(rawRules) || rawRules.length === 0) {
10
+ throw new Error('rules is undefined');
11
+ }
12
+ this.rules = rawRules.map((rule, index) => this.resolveRule(grammar, lexicon, rule, index + 1));
13
+ }
14
+ edit(grammar) {
15
+ for (const rule of this.rules) {
16
+ grammar.setConnectCost(rule.leftRightId, rule.rightLeftId, rule.cost);
17
+ }
18
+ }
19
+ resolveRule(grammar, lexicon, rule, ruleIndex) {
20
+ const parsedRule = this.requireConnectionRule(rule, ruleIndex);
21
+ const leftWordId = this.resolveWordId(grammar, lexicon, parsedRule.left, 'rule', ruleIndex, 'left');
22
+ const rightWordId = this.resolveWordId(grammar, lexicon, parsedRule.right, 'rule', ruleIndex, 'right');
23
+ return {
24
+ leftRightId: lexicon.getRightId(leftWordId),
25
+ rightLeftId: lexicon.getLeftId(rightWordId),
26
+ cost: parsedRule.cost,
27
+ };
28
+ }
29
+ resolveWordId(grammar, lexicon, target, ruleLabel, ruleIndex, side) {
30
+ if (typeof target.surface !== 'string' || target.surface === '') {
31
+ throw new Error(`${ruleLabel} ${ruleIndex} ${side} surface must be a non-empty string`);
32
+ }
33
+ if (typeof target.reading !== 'string' || target.reading === '') {
34
+ throw new Error(`${ruleLabel} ${ruleIndex} ${side} reading must be a non-empty string`);
35
+ }
36
+ const pos = this.normalizePos(target.pos, ruleLabel, ruleIndex, side);
37
+ const posId = grammar.getPartOfSpeechId(pos);
38
+ if (posId < 0) {
39
+ throw new Error(`${ruleLabel} ${ruleIndex} ${side} POS ${pos.join(',')} was not found in the loaded grammar`);
40
+ }
41
+ const wordId = lexicon.getWordId(target.surface, posId, target.reading);
42
+ if (wordId < 0) {
43
+ throw new Error(`${ruleLabel} ${ruleIndex} ${side} entry ${target.surface} (${pos.join(',')} / ${target.reading}) was not found in the loaded lexicon`);
44
+ }
45
+ return wordId;
46
+ }
47
+ normalizePos(pos, ruleLabel, ruleIndex, side) {
48
+ if (!Array.isArray(pos) || pos.length === 0) {
49
+ throw new Error(`${ruleLabel} ${ruleIndex} ${side} pos must be a non-empty string list`);
50
+ }
51
+ const normalized = pos.map((item) => {
52
+ if (typeof item !== 'string') {
53
+ throw new Error(`${ruleLabel} ${ruleIndex} ${side} pos must contain only strings`);
54
+ }
55
+ return item;
56
+ });
57
+ while (normalized.length < 6) {
58
+ normalized.push('*');
59
+ }
60
+ return normalized.slice(0, 6);
61
+ }
62
+ requireConnectionRule(rule, ruleIndex) {
63
+ if (typeof rule !== 'object' || rule === null) {
64
+ throw new Error(`rule ${ruleIndex} must be an object`);
65
+ }
66
+ const obj = rule;
67
+ const left = obj.left;
68
+ const right = obj.right;
69
+ const cost = obj.cost;
70
+ if (typeof cost !== 'number') {
71
+ throw new Error(`rule ${ruleIndex} cost must be a number`);
72
+ }
73
+ return {
74
+ left: this.requireEntryTarget(left, ruleIndex, 'left'),
75
+ right: this.requireEntryTarget(right, ruleIndex, 'right'),
76
+ cost,
77
+ };
78
+ }
79
+ requireEntryTarget(value, ruleIndex, side) {
80
+ if (typeof value !== 'object' || value === null) {
81
+ throw new Error(`rule ${ruleIndex} ${side} must be an object`);
82
+ }
83
+ const obj = value;
84
+ const surface = obj.surface;
85
+ const pos = obj.pos;
86
+ const reading = obj.reading;
87
+ if (typeof surface !== 'string') {
88
+ throw new Error(`rule ${ruleIndex} ${side} surface must be a string`);
89
+ }
90
+ if (!Array.isArray(pos)) {
91
+ throw new Error(`rule ${ruleIndex} ${side} pos must be a string list`);
92
+ }
93
+ if (typeof reading !== 'string') {
94
+ throw new Error(`rule ${ruleIndex} ${side} reading must be a string`);
95
+ }
96
+ return {
97
+ surface,
98
+ pos: pos,
99
+ reading,
100
+ };
101
+ }
102
+ }
@@ -1,6 +1,7 @@
1
1
  export { Plugin } from './base.js';
2
2
  export { EditConnectionCostPlugin } from './connection/base.js';
3
3
  export { InhibitConnectionPlugin } from './connection/inhibitConnectionPlugin.js';
4
+ export { TargetedConnectionCostPlugin } from './connection/targetedConnectionCostPlugin.js';
4
5
  export { MorphemeFormatterPlugin } from './formatter/base.js';
5
6
  export { SimpleMorphemeFormatter } from './formatter/simpleMorphemeFormatter.js';
6
7
  export { WordSegmentationFormatter } from './formatter/wordSegmentationFormatter.js';
@@ -1,6 +1,7 @@
1
1
  export { Plugin } from './base.js';
2
2
  export { EditConnectionCostPlugin } from './connection/base.js';
3
3
  export { InhibitConnectionPlugin } from './connection/inhibitConnectionPlugin.js';
4
+ export { TargetedConnectionCostPlugin } from './connection/targetedConnectionCostPlugin.js';
4
5
  export { MorphemeFormatterPlugin } from './formatter/base.js';
5
6
  export { SimpleMorphemeFormatter } from './formatter/simpleMorphemeFormatter.js';
6
7
  export { WordSegmentationFormatter } from './formatter/wordSegmentationFormatter.js';
@@ -1,6 +1,7 @@
1
1
  import { PathAnchor } from '../config/pathAnchor.js';
2
2
  import type { Settings } from '../config/settings.js';
3
3
  import type { Grammar } from '../dictionary/grammar.js';
4
+ import type { Lexicon } from '../dictionary/lexicon.js';
4
5
  import type { Plugin } from './base.js';
5
6
  import type { EditConnectionCostPlugin } from './connection/base.js';
6
7
  import type { MorphemeFormatterPlugin } from './formatter/base.js';
@@ -11,6 +12,10 @@ export interface LoadedPlugin<T extends Plugin> {
11
12
  plugin: T;
12
13
  className: string;
13
14
  }
15
+ type PluginConfig = {
16
+ className: string;
17
+ settings: Settings;
18
+ };
14
19
  export declare class PluginLoader {
15
20
  private readonly anchor;
16
21
  constructor(anchor?: PathAnchor);
@@ -19,27 +24,16 @@ export declare class PluginLoader {
19
24
  loadPathRewritePlugin(className: string, settings: Settings): Promise<LoadedPlugin<PathRewritePlugin>>;
20
25
  loadEditConnectionCostPlugin(className: string, settings: Settings): Promise<LoadedPlugin<EditConnectionCostPlugin>>;
21
26
  loadMorphemeFormatterPlugin(className: string, settings: Settings): Promise<LoadedPlugin<MorphemeFormatterPlugin>>;
22
- loadInputTextPlugins(configs: {
23
- className: string;
24
- settings: Settings;
25
- }[], grammar: Grammar): Promise<LoadedPlugin<InputTextPlugin>[]>;
26
- loadOovProviderPlugins(configs: {
27
- className: string;
28
- settings: Settings;
29
- }[], grammar: Grammar): Promise<LoadedPlugin<OovProviderPlugin>[]>;
30
- loadPathRewritePlugins(configs: {
31
- className: string;
32
- settings: Settings;
33
- }[], grammar: Grammar): Promise<LoadedPlugin<PathRewritePlugin>[]>;
34
- loadEditConnectionCostPlugins(configs: {
35
- className: string;
36
- settings: Settings;
37
- }[], grammar: Grammar): Promise<LoadedPlugin<EditConnectionCostPlugin>[]>;
27
+ loadInputTextPlugins(configs: PluginConfig[], grammar: Grammar): Promise<LoadedPlugin<InputTextPlugin>[]>;
28
+ loadOovProviderPlugins(configs: PluginConfig[], grammar: Grammar): Promise<LoadedPlugin<OovProviderPlugin>[]>;
29
+ loadPathRewritePlugins(configs: PluginConfig[], grammar: Grammar): Promise<LoadedPlugin<PathRewritePlugin>[]>;
30
+ loadEditConnectionCostPlugins(configs: PluginConfig[], grammar: Grammar, lexicon: Lexicon): Promise<LoadedPlugin<EditConnectionCostPlugin>[]>;
38
31
  private loadPlugin;
32
+ private loadConfiguredPlugins;
39
33
  private findPluginClass;
40
34
  private isPluginConstructor;
41
35
  private resolveClassSpecifier;
42
36
  private isPathLikeSpecifier;
43
- private isBuiltIn;
44
37
  private getBuiltIn;
45
38
  }
39
+ export {};
@@ -2,6 +2,7 @@ import { isAbsolute, resolve } from 'node:path';
2
2
  import { pathToFileURL } from 'node:url';
3
3
  import { PathAnchor } from '../config/pathAnchor.js';
4
4
  import { InhibitConnectionPlugin } from './connection/inhibitConnectionPlugin.js';
5
+ import { TargetedConnectionCostPlugin } from './connection/targetedConnectionCostPlugin.js';
5
6
  import { DefaultInputTextPlugin } from './inputText/defaultInputTextPlugin.js';
6
7
  import { IgnoreYomiganaPlugin } from './inputText/ignoreYomiganaPlugin.js';
7
8
  import { ProlongedSoundMarkInputTextPlugin } from './inputText/prolongedSoundMarkPlugin.js';
@@ -37,37 +38,19 @@ export class PluginLoader {
37
38
  return { plugin, className };
38
39
  }
39
40
  async loadInputTextPlugins(configs, grammar) {
40
- const results = [];
41
- for (const config of configs) {
42
- const loaded = await this.loadInputTextPlugin(config.className, config.settings);
43
- loaded.plugin.setUp(grammar);
44
- results.push(loaded);
45
- }
46
- return results;
41
+ return this.loadConfiguredPlugins(configs, grammar, (config) => this.loadInputTextPlugin(config.className, config.settings), (plugin) => plugin.setUp(grammar));
47
42
  }
48
43
  async loadOovProviderPlugins(configs, grammar) {
49
- const results = [];
50
- for (const config of configs) {
51
- const loaded = await this.loadOovProviderPlugin(config.className, config.settings);
52
- loaded.plugin.setUp(grammar);
53
- results.push(loaded);
54
- }
55
- return results;
44
+ return this.loadConfiguredPlugins(configs, grammar, (config) => this.loadOovProviderPlugin(config.className, config.settings), (plugin) => plugin.setUp(grammar));
56
45
  }
57
46
  async loadPathRewritePlugins(configs, grammar) {
58
- const results = [];
59
- for (const config of configs) {
60
- const loaded = await this.loadPathRewritePlugin(config.className, config.settings);
61
- loaded.plugin.setUp(grammar);
62
- results.push(loaded);
63
- }
64
- return results;
47
+ return this.loadConfiguredPlugins(configs, grammar, (config) => this.loadPathRewritePlugin(config.className, config.settings), (plugin) => plugin.setUp(grammar));
65
48
  }
66
- async loadEditConnectionCostPlugins(configs, grammar) {
49
+ async loadEditConnectionCostPlugins(configs, grammar, lexicon) {
67
50
  const results = [];
68
51
  for (const config of configs) {
69
52
  const loaded = await this.loadEditConnectionCostPlugin(config.className, config.settings);
70
- loaded.plugin.setUp(grammar);
53
+ await loaded.plugin.setUp(grammar, lexicon);
71
54
  results.push(loaded);
72
55
  }
73
56
  return results;
@@ -75,8 +58,9 @@ export class PluginLoader {
75
58
  async loadPlugin(className, settings) {
76
59
  try {
77
60
  let PluginClass;
78
- if (this.isBuiltIn(className)) {
79
- PluginClass = this.getBuiltIn(className);
61
+ const builtInClass = this.getBuiltIn(className);
62
+ if (builtInClass !== null) {
63
+ PluginClass = builtInClass;
80
64
  }
81
65
  else {
82
66
  const classSpecifier = await this.resolveClassSpecifier(className);
@@ -91,6 +75,15 @@ export class PluginLoader {
91
75
  throw new Error(`Failed to load plugin ${className}: ${error instanceof Error ? error.message : String(error)}`);
92
76
  }
93
77
  }
78
+ async loadConfiguredPlugins(configs, grammar, loadPlugin, setUp) {
79
+ const results = [];
80
+ for (const config of configs) {
81
+ const loaded = await loadPlugin(config);
82
+ await setUp(loaded.plugin, grammar);
83
+ results.push(loaded);
84
+ }
85
+ return results;
86
+ }
94
87
  findPluginClass(module, className) {
95
88
  if (typeof module === 'object' && module !== null) {
96
89
  const obj = module;
@@ -138,9 +131,6 @@ export class PluginLoader {
138
131
  className.startsWith('..\\') ||
139
132
  isAbsolute(className));
140
133
  }
141
- isBuiltIn(name) {
142
- return (name in BUILT_IN_PLUGINS || name.split('.').pop() in BUILT_IN_PLUGINS);
143
- }
144
134
  getBuiltIn(name) {
145
135
  if (name in BUILT_IN_PLUGINS) {
146
136
  return BUILT_IN_PLUGINS[name];
@@ -149,7 +139,7 @@ export class PluginLoader {
149
139
  if (shortName in BUILT_IN_PLUGINS) {
150
140
  return BUILT_IN_PLUGINS[shortName];
151
141
  }
152
- throw new Error(`Plugin ${name} not found in built-ins`);
142
+ return null;
153
143
  }
154
144
  }
155
145
  const BUILT_IN_PLUGINS = {
@@ -163,6 +153,7 @@ const BUILT_IN_PLUGINS = {
163
153
  JoinKatakanaOovPlugin,
164
154
  JoinNumericPlugin,
165
155
  InhibitConnectionPlugin,
156
+ TargetedConnectionCostPlugin,
166
157
  'com.worksap.nlp.sudachi.DefaultInputTextPlugin': DefaultInputTextPlugin,
167
158
  'com.worksap.nlp.sudachi.IgnoreYomiganaPlugin': IgnoreYomiganaPlugin,
168
159
  'com.worksap.nlp.sudachi.ProlongedSoundMarkInputTextPlugin': ProlongedSoundMarkInputTextPlugin,
@@ -173,4 +164,5 @@ const BUILT_IN_PLUGINS = {
173
164
  'com.worksap.nlp.sudachi.JoinKatakanaOovPlugin': JoinKatakanaOovPlugin,
174
165
  'com.worksap.nlp.sudachi.JoinNumericPlugin': JoinNumericPlugin,
175
166
  'com.worksap.nlp.sudachi.InhibitConnectionPlugin': InhibitConnectionPlugin,
167
+ 'com.worksap.nlp.sudachi.TargetedConnectionCostPlugin': TargetedConnectionCostPlugin,
176
168
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "sudachi-ts",
3
- "version": "0.1.21",
3
+ "version": "0.1.22",
4
4
  "description": "TypeScript port of Sudachi morphological analyzer for Japanese text",
5
5
  "keywords": [
6
6
  "morphological-analyzer",