sudachi-ts 0.1.20-beta.7 → 0.1.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -12,10 +12,10 @@ TypeScript port of [Sudachi](https://github.com/WorksApplications/Sudachi) Japan
12
12
  - **Binary Dictionary Compatibility**: Load and use pre-built Sudachi dictionaries
13
13
  - **Dynamic Plugin System**: Extensible architecture with runtime plugin loading
14
14
  - **Dictionary Building**: Complete CSV to binary dictionary conversion
15
- - **Sentence Detection**: Multi-sentence text processing
16
- - **UTF-8 Handling**: Proper Japanese text normalization and character encoding
17
- - **POS Matching**: Flexible part-of-speech filtering and matching
18
- - **Counter Alias Recovery**: Resolves numeric kana counters such as `1こ` to the canonical counter lattice before best-path selection
15
+ - **Sentence Detection**: Multi-sentence text processing
16
+ - **UTF-8 Handling**: Proper Japanese text normalization and character encoding
17
+ - **POS Matching**: Flexible part-of-speech filtering and matching
18
+ - **Counter Alias Recovery**: Resolves numeric kana counters such as `1こ` to the canonical counter lattice before best-path selection
19
19
 
20
20
  ## Requirements
21
21
 
@@ -106,7 +106,7 @@ const config = await loadConfig('./sudachi.json');
106
106
  const dict = Dictionary.create();
107
107
  ```
108
108
 
109
- Example `sudachi.json`:
109
+ Example `sudachi.json`:
110
110
 
111
111
  ```json
112
112
  {
@@ -121,31 +121,31 @@ Example `sudachi.json`:
121
121
  }
122
122
  }
123
123
  ]
124
- }
125
- ```
126
-
127
- For non-absolute file references in config (dictionary files, plugin module paths,
128
- and built-in plugin file settings), Sudachi-TS tries paths relative to the config
129
- file first, then relative to the current working directory.
130
-
131
- By default, Sudachi-TS enables a built-in compound-particle lexicon
132
- (`"enableDefaultCompoundParticles": true`) so forms such as `かも`, `のか`,
133
- and `だから` are tokenized as single morphemes. Set it to `false` to disable:
124
+ }
125
+ ```
126
+
127
+ For non-absolute file references in config (dictionary files, plugin module paths,
128
+ and built-in plugin file settings), Sudachi-TS tries paths relative to the config
129
+ file first, then relative to the current working directory.
130
+
131
+ By default, Sudachi-TS enables a built-in compound-particle lexicon
132
+ (`"enableDefaultCompoundParticles": true`) so forms such as `かも`, `のか`,
133
+ and `だから` are tokenized as single morphemes. Set it to `false` to disable:
134
134
 
135
135
  ```json
136
- {
137
- "enableDefaultCompoundParticles": false
138
- }
139
- ```
140
-
141
- The default OOV plugin stack also injects counter aliases in numeric contexts,
142
- so kana counters such as `りんごを1こください。` are analyzed as
143
- `りんご / を / 1 / こ / ください / 。` with the counter normalized to `個`
144
- instead of falling through to unrelated dictionary entries.
145
-
146
- ## Working with Morphemes
147
-
148
- Access detailed morpheme information:
136
+ {
137
+ "enableDefaultCompoundParticles": false
138
+ }
139
+ ```
140
+
141
+ The default OOV plugin stack also injects counter aliases in numeric contexts,
142
+ so kana counters such as `りんごを1こください。` are analyzed as
143
+ `りんご / を / 1 / こ / ください / 。` with the counter normalized to `個`
144
+ instead of falling through to unrelated dictionary entries.
145
+
146
+ ## Working with Morphemes
147
+
148
+ Access detailed morpheme information:
149
149
 
150
150
  ```typescript
151
151
  const morpheme = result[0];
@@ -171,34 +171,34 @@ console.log(morpheme.end());
171
171
  console.log(morpheme.length());
172
172
 
173
173
  // Check morpheme properties
174
- console.log(morpheme.isOov()); // True if out-of-vocabulary
175
- ```
176
-
177
- ## Public Dictionary Access
178
-
179
- `DictionaryFactory` returns a public `Dictionary` that now exposes stable
180
- dictionary metadata APIs without requiring internal imports.
181
-
182
- ```typescript
183
- import { DictionaryFactory } from 'sudachi-ts';
184
-
185
- const dictionary = await new DictionaryFactory().create('./sudachi.json');
186
-
187
- const grammar = dictionary.getGrammar();
188
- const lexicon = dictionary.getLexicon();
189
-
190
- const kyotoId = lexicon.getWordId('京都', 3, 'キョウト');
191
- const kyotoInfo = lexicon.getWordInfo(kyotoId);
192
-
193
- console.log(grammar.getPartOfSpeechString(kyotoInfo.getPOSId()));
194
- console.log(kyotoInfo.getSynonymGroupIds());
195
- ```
196
-
197
- When user dictionaries are configured, `dictionary.getLexicon()` exposes the
198
- merged lexicon view used by tokenization, so downstream plugins can look up both
199
- system and user dictionary entries through the same public API.
200
-
201
- ## Splitting Morphemes
174
+ console.log(morpheme.isOov()); // True if out-of-vocabulary
175
+ ```
176
+
177
+ ## Public Dictionary Access
178
+
179
+ `DictionaryFactory` returns a public `Dictionary` that now exposes stable
180
+ dictionary metadata APIs without requiring internal imports.
181
+
182
+ ```typescript
183
+ import { DictionaryFactory } from 'sudachi-ts';
184
+
185
+ const dictionary = await new DictionaryFactory().create('./sudachi.json');
186
+
187
+ const grammar = dictionary.getGrammar();
188
+ const lexicon = dictionary.getLexicon();
189
+
190
+ const kyotoId = lexicon.getWordId('京都', 3, 'キョウト');
191
+ const kyotoInfo = lexicon.getWordInfo(kyotoId);
192
+
193
+ console.log(grammar.getPartOfSpeechString(kyotoInfo.getPOSId()));
194
+ console.log(kyotoInfo.getSynonymGroupIds());
195
+ ```
196
+
197
+ When user dictionaries are configured, `dictionary.getLexicon()` exposes the
198
+ merged lexicon view used by tokenization, so downstream plugins can look up both
199
+ system and user dictionary entries through the same public API.
200
+
201
+ ## Splitting Morphemes
202
202
 
203
203
  Use the split method to change granularity:
204
204
 
@@ -221,18 +221,18 @@ import { SentenceDetector } from 'sudachi-ts/sentdetect/sentenceDetector.js';
221
221
 
222
222
  const sentences = tokenizer.tokenizeSentences('東京都は日本の首都です。大阪は商業都市です。');
223
223
 
224
- for (const sentence of sentences) {
225
- console.log('--- Sentence ---');
226
- for (const morpheme of sentence) {
227
- console.log(morpheme.surface());
228
- }
229
- }
230
- ```
231
-
232
- `tokenizeSentences(...)` treats standalone quoted dialogue endings (for example
233
- `「...!」`) as sentence boundaries, but keeps quoted speech attached to following
234
- reporting clauses such as `「...。」と言いました。`. It also skips leading
235
- inter-sentence whitespace such as newlines before tokenization.
224
+ for (const sentence of sentences) {
225
+ console.log('--- Sentence ---');
226
+ for (const morpheme of sentence) {
227
+ console.log(morpheme.surface());
228
+ }
229
+ }
230
+ ```
231
+
232
+ `tokenizeSentences(...)` treats standalone quoted dialogue endings (for example
233
+ `「...!」`) as sentence boundaries, but keeps quoted speech attached to following
234
+ reporting clauses such as `「...。」と言いました。`. It also skips leading
235
+ inter-sentence whitespace such as newlines before tokenization.
236
236
 
237
237
  Lazy sentence processing for streaming:
238
238
 
@@ -305,61 +305,26 @@ const plugin = await loader.loadInputTextPlugin(
305
305
 
306
306
  See [PLUGINS.md](./PLUGINS.md) for detailed plugin development guide.
307
307
 
308
- Quick local comparison for the PoC token chunker plugin:
309
-
310
- ```bash
311
- npm exec tsx examples/token-chunker-plugin.ts /path/to/system.dic "東京大学"
312
- ```
313
-
314
- This example prints each token as `surface/reading` so the chunking impact on
315
- readings is visible in the baseline vs plugin outputs.
316
- `TokenChunkerPlugin` is designed and validated against the full Sudachi system
317
- dictionary (`system_full.dic` / `system.dic`), so prefer full-dictionary checks
318
- when adding or tuning chunk rules.
319
- `TokenChunkerPlugin` requires `enableDefaultCompoundParticles: true`. Dictionary
320
- creation throws an error when this plugin is configured with default compound
321
- particles disabled.
322
- `TokenChunkerPlugin` is intended for `SplitMode.C` tokenization; calling
323
- `tokenize(SplitMode.A, ...)` or `tokenize(SplitMode.B, ...)` with this plugin
324
- enabled throws an error.
325
- When the lattice already contains a lexicalized compound candidate, the chunker
326
- also prefers learner-facing noun compounds such as `学校` over split analyses
327
- like `学` + `校`.
328
- The chunker also handles polite progressive colloquial forms where `て/で` is
329
- an auxiliary (`てる/でる`) such as `残ってます` and `残ってますよ`, plus
330
- polite colloquial contraction forms like `太っちゃいます` and
331
- `太っちゃいますよ`, and colloquial `〜てく` past contractions like
332
- `持ってった`, colloquial `〜ておく` past contractions like `やめといた`, plus discourse chunks like `だなって` and contractions like
333
- `してんだ`, `あっけど`, particle chunks like `とか`, sentence-final turns like `いいよな`, copula quote
334
- spans like `ヒマだって`, and quoted reason clauses like `言ってたし`. It also chunks causative auxiliaries such as
335
- `打たせる` / `内させる` into a single learner-facing token, along with
336
- polite connective forms such as `込めまして`, negative connective forms such as
337
- `遣わなくて`, lexicalized adverbials such as `別に`, conversational turns such as
338
- `いいよ`, and causative te-forms such as `させて`. For learner-facing output it
339
- also prefers more natural alternate dictionary readings when the lattice already
340
- contains them, such as `明日` -> `アシタ`, `明後日` -> `アサッテ`, and
341
- `私` -> `ワタシ`, and it supports additional `preferredReadings` overrides
342
- via plugin settings using entries like `"私=ワタシ"`. It also normalizes
343
- mixed-script weekday compounds such as `火よう日` -> `カヨウビ`.
344
- The core tokenizer also rewrites sentence-ending ambiguities such as
345
- `ね | こと | ね` into `ねこ | と | ね` when the lattice supports that path.
346
-
347
- ## Dictionary Building
308
+
309
+ The core tokenizer also rewrites sentence-ending ambiguities such as
310
+ `ね | こと | ね` into `ねこ | と | ね` when the lattice supports that path.
311
+
312
+ ## Dictionary Building
348
313
 
349
314
  Build binary dictionaries from CSV source:
350
315
 
351
- ```typescript
352
- import { systemBuilder } from 'sudachi-ts/dictionary-build';
353
-
354
- const builder = systemBuilder();
355
-
356
- // Add lexicon entries from CSV
357
- await builder.matrix(matrixDefContents);
358
- await builder.lexicon(lexiconCsvContents, 'lexicon.csv');
359
-
360
- // Build binary dictionary
361
- const { buffer } = await builder.build();
362
- ```
316
+ ```typescript
317
+ import { systemBuilder } from 'sudachi-ts/dictionary-build';
318
+
319
+ const builder = systemBuilder();
320
+
321
+ // Add lexicon entries from CSV
322
+ await builder.matrix(matrixDefContents);
323
+ await builder.lexicon(lexiconCsvContents, 'lexicon.csv');
324
+
325
+ // Build binary dictionary
326
+ const { buffer } = await builder.build();
327
+ ```
363
328
 
364
329
  CSV format:
365
330
 
@@ -395,23 +360,23 @@ See [CONFIG.md](./CONFIG.md) for detailed configuration options.
395
360
 
396
361
  ## Development
397
362
 
398
- ```bash
399
- # Clone repository
400
- git clone https://github.com/your-org/sudachi-ts.git
401
- cd sudachi-ts
402
-
403
- # Install dependencies
404
- npm install
405
-
406
- # Type check
407
- npm run typecheck
408
-
409
- # Run tests
410
- npm test
411
-
412
- # Lint
413
- npm run check:fix
414
- ```
363
+ ```bash
364
+ # Clone repository
365
+ git clone https://github.com/your-org/sudachi-ts.git
366
+ cd sudachi-ts
367
+
368
+ # Install dependencies
369
+ npm install
370
+
371
+ # Type check
372
+ npm run typecheck
373
+
374
+ # Run tests
375
+ npm test
376
+
377
+ # Lint
378
+ npm run check:fix
379
+ ```
415
380
 
416
381
  ## Architecture
417
382
 
@@ -6,12 +6,6 @@ import { PluginLoader } from '../plugins/loader.js';
6
6
  import { BinaryDictionary } from './binaryDictionary.js';
7
7
  import { loadDefaultCompoundLexicon } from './defaultCompoundLexicon.js';
8
8
  import { LexiconSet } from './lexiconSet.js';
9
- function isTokenChunkerPlugin(className) {
10
- if (className === 'com.worksap.nlp.sudachi.TokenChunkerPlugin') {
11
- return true;
12
- }
13
- return className.split('.').pop() === 'TokenChunkerPlugin';
14
- }
15
9
  export class DictionaryFactory {
16
10
  async create(configPath, customConfig) {
17
11
  const config = customConfig || (await loadConfig(configPath));
@@ -52,6 +46,15 @@ export class DictionaryFactory {
52
46
  }
53
47
  const loader = new PluginLoader(anchor);
54
48
  const defaultConfig = Config.parse(DEFAULT_CONFIG_JSON).setAnchor(anchor);
49
+ let editConnectionPluginConfs = config.getPlugins('editConnectionCostPlugin');
50
+ if (!editConnectionPluginConfs || editConnectionPluginConfs.length === 0) {
51
+ editConnectionPluginConfs = defaultConfig.getPlugins('editConnectionCostPlugin');
52
+ }
53
+ if (editConnectionPluginConfs && editConnectionPluginConfs.length > 0) {
54
+ for (const loaded of await loader.loadEditConnectionCostPlugins(editConnectionPluginConfs, grammar, lexicon)) {
55
+ loaded.plugin.edit(grammar);
56
+ }
57
+ }
55
58
  let inputTextPluginConfs = config.getPlugins('inputTextPlugin');
56
59
  if (!inputTextPluginConfs || inputTextPluginConfs.length === 0) {
57
60
  inputTextPluginConfs = defaultConfig.getPlugins('inputTextPlugin');
@@ -82,10 +85,6 @@ export class DictionaryFactory {
82
85
  if (!pathRewritePluginConfs || pathRewritePluginConfs.length === 0) {
83
86
  pathRewritePluginConfs = defaultConfig.getPlugins('pathRewritePlugin');
84
87
  }
85
- if (!enableDefaultCompoundParticles &&
86
- (pathRewritePluginConfs || []).some((conf) => isTokenChunkerPlugin(conf.className))) {
87
- throw new Error('TokenChunkerPlugin is only compatible when enableDefaultCompoundParticles is true.');
88
- }
89
88
  const pathRewritePlugins = (await loader.loadPathRewritePlugins(pathRewritePluginConfs || [], grammar)).map((p) => p.plugin);
90
89
  return new Dictionary(grammar, lexicon, inputTextPlugins, oovProviderPlugins, pathRewritePlugins);
91
90
  }
@@ -24,7 +24,7 @@ export { DEPTH, MAX_COMPONENT_LENGTH, POS } from './dictionary/pos.js';
24
24
  export { PartialPOS, PosMatcher } from './dictionary/posMatcher.js';
25
25
  export { WordInfo } from './dictionary/wordInfo.js';
26
26
  export { DoubleArray } from './dictionary-build/doubleArray.js';
27
- export { EditConnectionCostPlugin, InputTextPlugin, type LoadedPlugin, MorphemeFormatterPlugin, OovProviderPlugin, PathRewritePlugin, Plugin, PluginLoader, } from './plugins/index.js';
27
+ export { EditConnectionCostPlugin, InputTextPlugin, type LoadedPlugin, MorphemeFormatterPlugin, OovProviderPlugin, PathRewritePlugin, Plugin, PluginLoader, TargetedConnectionCostPlugin, } from './plugins/index.js';
28
28
  export type { NonBreakChecker } from './sentdetect/sentenceDetector.js';
29
29
  export { DEFAULT_LIMIT, SentenceDetector, } from './sentdetect/sentenceDetector.js';
30
30
  export { applyMask, dic, dicIdMask, MAX_DIC_ID, MAX_WORD_ID, make, word, } from './utils/wordId.js';
@@ -15,7 +15,7 @@ export { DEPTH, MAX_COMPONENT_LENGTH, POS } from './dictionary/pos.js';
15
15
  export { PartialPOS, PosMatcher } from './dictionary/posMatcher.js';
16
16
  export { WordInfo } from './dictionary/wordInfo.js';
17
17
  export { DoubleArray } from './dictionary-build/doubleArray.js';
18
- export { EditConnectionCostPlugin, InputTextPlugin, MorphemeFormatterPlugin, OovProviderPlugin, PathRewritePlugin, Plugin, PluginLoader, } from './plugins/index.js';
18
+ export { EditConnectionCostPlugin, InputTextPlugin, MorphemeFormatterPlugin, OovProviderPlugin, PathRewritePlugin, Plugin, PluginLoader, TargetedConnectionCostPlugin, } from './plugins/index.js';
19
19
  export { DEFAULT_LIMIT, SentenceDetector, } from './sentdetect/sentenceDetector.js';
20
20
  export { applyMask, dic, dicIdMask, MAX_DIC_ID, MAX_WORD_ID, make, word, } from './utils/wordId.js';
21
21
  export { addNth, hasNth, MAX_LENGTH, nth } from './utils/wordMask.js';
@@ -1,4 +1,4 @@
1
- import type { Settings } from '../config/settings.js';
1
+ import { Settings } from '../config/settings.js';
2
2
  export declare abstract class Plugin {
3
3
  protected settings: Settings;
4
4
  constructor();
@@ -1,7 +1,8 @@
1
+ import { Settings } from '../config/settings.js';
1
2
  export class Plugin {
2
3
  settings;
3
4
  constructor() {
4
- this.settings = { getString: () => null };
5
+ this.settings = Settings.empty();
5
6
  }
6
7
  setSettings(settings) {
7
8
  this.settings = settings;
@@ -1,7 +1,8 @@
1
1
  import type { Grammar } from '../../dictionary/grammar.js';
2
+ import type { Lexicon } from '../../dictionary/lexicon.js';
2
3
  import { Plugin } from '../base.js';
3
4
  export declare abstract class EditConnectionCostPlugin extends Plugin {
4
- setUp(_grammar: Grammar): void;
5
+ setUp(_grammar: Grammar, _lexicon?: Lexicon): void;
5
6
  abstract edit(grammar: Grammar): void;
6
7
  inhibitConnection(grammar: Grammar, left: number, right: number): void;
7
8
  }
@@ -1,6 +1,6 @@
1
1
  import { Plugin } from '../base.js';
2
2
  export class EditConnectionCostPlugin extends Plugin {
3
- setUp(_grammar) { }
3
+ setUp(_grammar, _lexicon) { }
4
4
  inhibitConnection(grammar, left, right) {
5
5
  grammar.setConnectCost(left, right, grammar.INHIBITED_CONNECTION);
6
6
  }
@@ -0,0 +1,13 @@
1
+ import type { Grammar } from '../../dictionary/grammar.js';
2
+ import type { Lexicon } from '../../dictionary/lexicon.js';
3
+ import { EditConnectionCostPlugin } from './base.js';
4
+ export declare class TargetedConnectionCostPlugin extends EditConnectionCostPlugin {
5
+ private rules;
6
+ setUp(grammar: Grammar, lexicon?: Lexicon): void;
7
+ edit(grammar: Grammar): void;
8
+ private resolveRule;
9
+ private resolveWordId;
10
+ private normalizePos;
11
+ private requireConnectionRule;
12
+ private requireEntryTarget;
13
+ }
@@ -0,0 +1,102 @@
1
+ import { EditConnectionCostPlugin } from './base.js';
2
+ export class TargetedConnectionCostPlugin extends EditConnectionCostPlugin {
3
+ rules = [];
4
+ setUp(grammar, lexicon) {
5
+ if (!lexicon) {
6
+ throw new Error('TargetedConnectionCostPlugin requires the lexicon during setup');
7
+ }
8
+ const rawRules = this.settings.toObject().rules;
9
+ if (!Array.isArray(rawRules) || rawRules.length === 0) {
10
+ throw new Error('rules is undefined');
11
+ }
12
+ this.rules = rawRules.map((rule, index) => this.resolveRule(grammar, lexicon, rule, index + 1));
13
+ }
14
+ edit(grammar) {
15
+ for (const rule of this.rules) {
16
+ grammar.setConnectCost(rule.leftRightId, rule.rightLeftId, rule.cost);
17
+ }
18
+ }
19
+ resolveRule(grammar, lexicon, rule, ruleIndex) {
20
+ const parsedRule = this.requireConnectionRule(rule, ruleIndex);
21
+ const leftWordId = this.resolveWordId(grammar, lexicon, parsedRule.left, 'rule', ruleIndex, 'left');
22
+ const rightWordId = this.resolveWordId(grammar, lexicon, parsedRule.right, 'rule', ruleIndex, 'right');
23
+ return {
24
+ leftRightId: lexicon.getRightId(leftWordId),
25
+ rightLeftId: lexicon.getLeftId(rightWordId),
26
+ cost: parsedRule.cost,
27
+ };
28
+ }
29
+ resolveWordId(grammar, lexicon, target, ruleLabel, ruleIndex, side) {
30
+ if (typeof target.surface !== 'string' || target.surface === '') {
31
+ throw new Error(`${ruleLabel} ${ruleIndex} ${side} surface must be a non-empty string`);
32
+ }
33
+ if (typeof target.reading !== 'string' || target.reading === '') {
34
+ throw new Error(`${ruleLabel} ${ruleIndex} ${side} reading must be a non-empty string`);
35
+ }
36
+ const pos = this.normalizePos(target.pos, ruleLabel, ruleIndex, side);
37
+ const posId = grammar.getPartOfSpeechId(pos);
38
+ if (posId < 0) {
39
+ throw new Error(`${ruleLabel} ${ruleIndex} ${side} POS ${pos.join(',')} was not found in the loaded grammar`);
40
+ }
41
+ const wordId = lexicon.getWordId(target.surface, posId, target.reading);
42
+ if (wordId < 0) {
43
+ throw new Error(`${ruleLabel} ${ruleIndex} ${side} entry ${target.surface} (${pos.join(',')} / ${target.reading}) was not found in the loaded lexicon`);
44
+ }
45
+ return wordId;
46
+ }
47
+ normalizePos(pos, ruleLabel, ruleIndex, side) {
48
+ if (!Array.isArray(pos) || pos.length === 0) {
49
+ throw new Error(`${ruleLabel} ${ruleIndex} ${side} pos must be a non-empty string list`);
50
+ }
51
+ const normalized = pos.map((item) => {
52
+ if (typeof item !== 'string') {
53
+ throw new Error(`${ruleLabel} ${ruleIndex} ${side} pos must contain only strings`);
54
+ }
55
+ return item;
56
+ });
57
+ while (normalized.length < 6) {
58
+ normalized.push('*');
59
+ }
60
+ return normalized.slice(0, 6);
61
+ }
62
+ requireConnectionRule(rule, ruleIndex) {
63
+ if (typeof rule !== 'object' || rule === null) {
64
+ throw new Error(`rule ${ruleIndex} must be an object`);
65
+ }
66
+ const obj = rule;
67
+ const left = obj.left;
68
+ const right = obj.right;
69
+ const cost = obj.cost;
70
+ if (typeof cost !== 'number') {
71
+ throw new Error(`rule ${ruleIndex} cost must be a number`);
72
+ }
73
+ return {
74
+ left: this.requireEntryTarget(left, ruleIndex, 'left'),
75
+ right: this.requireEntryTarget(right, ruleIndex, 'right'),
76
+ cost,
77
+ };
78
+ }
79
+ requireEntryTarget(value, ruleIndex, side) {
80
+ if (typeof value !== 'object' || value === null) {
81
+ throw new Error(`rule ${ruleIndex} ${side} must be an object`);
82
+ }
83
+ const obj = value;
84
+ const surface = obj.surface;
85
+ const pos = obj.pos;
86
+ const reading = obj.reading;
87
+ if (typeof surface !== 'string') {
88
+ throw new Error(`rule ${ruleIndex} ${side} surface must be a string`);
89
+ }
90
+ if (!Array.isArray(pos)) {
91
+ throw new Error(`rule ${ruleIndex} ${side} pos must be a string list`);
92
+ }
93
+ if (typeof reading !== 'string') {
94
+ throw new Error(`rule ${ruleIndex} ${side} reading must be a string`);
95
+ }
96
+ return {
97
+ surface,
98
+ pos: pos,
99
+ reading,
100
+ };
101
+ }
102
+ }
@@ -1,6 +1,7 @@
1
1
  export { Plugin } from './base.js';
2
2
  export { EditConnectionCostPlugin } from './connection/base.js';
3
3
  export { InhibitConnectionPlugin } from './connection/inhibitConnectionPlugin.js';
4
+ export { TargetedConnectionCostPlugin } from './connection/targetedConnectionCostPlugin.js';
4
5
  export { MorphemeFormatterPlugin } from './formatter/base.js';
5
6
  export { SimpleMorphemeFormatter } from './formatter/simpleMorphemeFormatter.js';
6
7
  export { WordSegmentationFormatter } from './formatter/wordSegmentationFormatter.js';
@@ -17,4 +18,3 @@ export { SimpleOovProviderPlugin } from './oov/simpleOovProviderPlugin.js';
17
18
  export { PathRewritePlugin } from './pathRewrite/base.js';
18
19
  export { JoinKatakanaOovPlugin } from './pathRewrite/joinKatakanaOovPlugin.js';
19
20
  export { JoinNumericPlugin } from './pathRewrite/joinNumericPlugin.js';
20
- export { TokenChunkerPlugin } from './pathRewrite/tokenChunkerPlugin.js';
@@ -1,6 +1,7 @@
1
1
  export { Plugin } from './base.js';
2
2
  export { EditConnectionCostPlugin } from './connection/base.js';
3
3
  export { InhibitConnectionPlugin } from './connection/inhibitConnectionPlugin.js';
4
+ export { TargetedConnectionCostPlugin } from './connection/targetedConnectionCostPlugin.js';
4
5
  export { MorphemeFormatterPlugin } from './formatter/base.js';
5
6
  export { SimpleMorphemeFormatter } from './formatter/simpleMorphemeFormatter.js';
6
7
  export { WordSegmentationFormatter } from './formatter/wordSegmentationFormatter.js';
@@ -17,4 +18,3 @@ export { SimpleOovProviderPlugin } from './oov/simpleOovProviderPlugin.js';
17
18
  export { PathRewritePlugin } from './pathRewrite/base.js';
18
19
  export { JoinKatakanaOovPlugin } from './pathRewrite/joinKatakanaOovPlugin.js';
19
20
  export { JoinNumericPlugin } from './pathRewrite/joinNumericPlugin.js';
20
- export { TokenChunkerPlugin } from './pathRewrite/tokenChunkerPlugin.js';
@@ -1,6 +1,7 @@
1
1
  import { PathAnchor } from '../config/pathAnchor.js';
2
2
  import type { Settings } from '../config/settings.js';
3
3
  import type { Grammar } from '../dictionary/grammar.js';
4
+ import type { Lexicon } from '../dictionary/lexicon.js';
4
5
  import type { Plugin } from './base.js';
5
6
  import type { EditConnectionCostPlugin } from './connection/base.js';
6
7
  import type { MorphemeFormatterPlugin } from './formatter/base.js';
@@ -11,6 +12,10 @@ export interface LoadedPlugin<T extends Plugin> {
11
12
  plugin: T;
12
13
  className: string;
13
14
  }
15
+ type PluginConfig = {
16
+ className: string;
17
+ settings: Settings;
18
+ };
14
19
  export declare class PluginLoader {
15
20
  private readonly anchor;
16
21
  constructor(anchor?: PathAnchor);
@@ -19,27 +24,16 @@ export declare class PluginLoader {
19
24
  loadPathRewritePlugin(className: string, settings: Settings): Promise<LoadedPlugin<PathRewritePlugin>>;
20
25
  loadEditConnectionCostPlugin(className: string, settings: Settings): Promise<LoadedPlugin<EditConnectionCostPlugin>>;
21
26
  loadMorphemeFormatterPlugin(className: string, settings: Settings): Promise<LoadedPlugin<MorphemeFormatterPlugin>>;
22
- loadInputTextPlugins(configs: {
23
- className: string;
24
- settings: Settings;
25
- }[], grammar: Grammar): Promise<LoadedPlugin<InputTextPlugin>[]>;
26
- loadOovProviderPlugins(configs: {
27
- className: string;
28
- settings: Settings;
29
- }[], grammar: Grammar): Promise<LoadedPlugin<OovProviderPlugin>[]>;
30
- loadPathRewritePlugins(configs: {
31
- className: string;
32
- settings: Settings;
33
- }[], grammar: Grammar): Promise<LoadedPlugin<PathRewritePlugin>[]>;
34
- loadEditConnectionCostPlugins(configs: {
35
- className: string;
36
- settings: Settings;
37
- }[], grammar: Grammar): Promise<LoadedPlugin<EditConnectionCostPlugin>[]>;
27
+ loadInputTextPlugins(configs: PluginConfig[], grammar: Grammar): Promise<LoadedPlugin<InputTextPlugin>[]>;
28
+ loadOovProviderPlugins(configs: PluginConfig[], grammar: Grammar): Promise<LoadedPlugin<OovProviderPlugin>[]>;
29
+ loadPathRewritePlugins(configs: PluginConfig[], grammar: Grammar): Promise<LoadedPlugin<PathRewritePlugin>[]>;
30
+ loadEditConnectionCostPlugins(configs: PluginConfig[], grammar: Grammar, lexicon: Lexicon): Promise<LoadedPlugin<EditConnectionCostPlugin>[]>;
38
31
  private loadPlugin;
32
+ private loadConfiguredPlugins;
39
33
  private findPluginClass;
40
34
  private isPluginConstructor;
41
35
  private resolveClassSpecifier;
42
36
  private isPathLikeSpecifier;
43
- private isBuiltIn;
44
37
  private getBuiltIn;
45
38
  }
39
+ export {};