flappa-doormal 2.19.0 → 2.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +63 -11
- package/README.md +383 -11
- package/dist/index.d.mts +440 -132
- package/dist/index.d.mts.map +1 -1
- package/dist/index.mjs +2 -4445
- package/dist/index.mjs.map +1 -1
- package/dist/mcp/server.d.mts +1 -0
- package/dist/mcp/server.mjs +156 -0
- package/dist/mcp/server.mjs.map +1 -0
- package/dist/segmentation-advisor-D375TL8-.mjs +6128 -0
- package/dist/segmentation-advisor-D375TL8-.mjs.map +1 -0
- package/package.json +18 -4
package/dist/index.d.mts
CHANGED
|
@@ -122,6 +122,152 @@ type BreakpointRule = PageRangeConstraintWithExclude & {
|
|
|
122
122
|
*/
|
|
123
123
|
type Breakpoint = string | BreakpointRule;
|
|
124
124
|
//#endregion
|
|
125
|
+
//#region src/types/dictionary.d.ts
|
|
126
|
+
/**
|
|
127
|
+
* Dictionary v2 profile types for Shamela-style Arabic dictionary segmentation.
|
|
128
|
+
*/
|
|
129
|
+
type DictionaryHeadingClass = 'chapter' | 'entry' | 'marker' | 'cluster';
|
|
130
|
+
type DictionaryHeadingScanClass = DictionaryHeadingClass | 'noise';
|
|
131
|
+
type DictionarySegmentKind = 'chapter' | 'entry' | 'marker';
|
|
132
|
+
type DictionarySegmentMeta = {
|
|
133
|
+
kind: DictionarySegmentKind;
|
|
134
|
+
lemma?: string;
|
|
135
|
+
};
|
|
136
|
+
/** Family key used by diagnostics and authoring tools. */
|
|
137
|
+
type DictionaryFamilyUse = DictionaryFamily['use'];
|
|
138
|
+
/** Rejection reason emitted by dictionary-profile diagnostics. */
|
|
139
|
+
type DictionaryDiagnosticReason = 'qualifierTail' | 'structuralLeak' | 'intro' | 'authorityIntro' | 'stopLemma' | 'previousWord' | 'previousChar' | 'pageContinuation';
|
|
140
|
+
type DictionaryGate = {
|
|
141
|
+
use: 'headingText';
|
|
142
|
+
match: string;
|
|
143
|
+
fuzzy?: boolean;
|
|
144
|
+
} | {
|
|
145
|
+
use: 'headingToken';
|
|
146
|
+
token: 'bab' | 'fasl' | 'kitab';
|
|
147
|
+
};
|
|
148
|
+
type DictionaryProfileValidationIssueCode = 'invalid_version' | 'missing_zones' | 'duplicate_zone_name' | 'empty_zone_name' | 'empty_zone_families' | 'invalid_zone_page_range' | 'empty_heading_classes' | 'inert_heading_family' | 'empty_inline_prefixes' | 'invalid_gate_match' | 'invalid_gate_fuzzy' | 'duplicate_activate_after_gate' | 'invalid_stop_words' | 'invalid_previous_words' | 'invalid_previous_chars' | 'invalid_previous_word_scope' | 'invalid_authority_intro_precision' | 'invalid_continuation_precision';
|
|
149
|
+
type DictionaryProfileValidationIssue = {
|
|
150
|
+
code: DictionaryProfileValidationIssueCode;
|
|
151
|
+
message: string;
|
|
152
|
+
path: string;
|
|
153
|
+
zoneName?: string;
|
|
154
|
+
};
|
|
155
|
+
type HeadingFamily = {
|
|
156
|
+
use: 'heading';
|
|
157
|
+
classes: DictionaryHeadingClass[];
|
|
158
|
+
emit: DictionarySegmentKind;
|
|
159
|
+
allowNextLineColon?: boolean;
|
|
160
|
+
allowSingleLetter?: boolean;
|
|
161
|
+
};
|
|
162
|
+
type LineEntryFamily = {
|
|
163
|
+
use: 'lineEntry';
|
|
164
|
+
wrappers?: 'none' | 'parentheses' | 'brackets' | 'curly' | 'any';
|
|
165
|
+
allowWhitespaceBeforeColon?: boolean;
|
|
166
|
+
allowMultiWord?: boolean;
|
|
167
|
+
emit: 'entry';
|
|
168
|
+
};
|
|
169
|
+
type InlineSubentryFamily = {
|
|
170
|
+
use: 'inlineSubentry';
|
|
171
|
+
prefixes?: string[];
|
|
172
|
+
stripPrefixesFromLemma?: boolean;
|
|
173
|
+
emit: 'entry';
|
|
174
|
+
};
|
|
175
|
+
type CodeLineFamily = {
|
|
176
|
+
use: 'codeLine';
|
|
177
|
+
wrappers?: 'none' | 'paired' | 'mismatched' | 'either';
|
|
178
|
+
emit: 'marker';
|
|
179
|
+
};
|
|
180
|
+
type PairedFormsFamily = {
|
|
181
|
+
use: 'pairedForms';
|
|
182
|
+
separator?: 'comma' | 'space';
|
|
183
|
+
emit: 'marker' | 'entry';
|
|
184
|
+
requireStatusTail?: boolean;
|
|
185
|
+
};
|
|
186
|
+
type DictionaryFamily = HeadingFamily | LineEntryFamily | InlineSubentryFamily | CodeLineFamily | PairedFormsFamily;
|
|
187
|
+
type PageContinuationBlocker = {
|
|
188
|
+
use: 'pageContinuation';
|
|
189
|
+
appliesTo?: DictionaryFamily['use'][];
|
|
190
|
+
authorityPrecision?: 'high' | 'aggressive';
|
|
191
|
+
};
|
|
192
|
+
type IntroBlocker = {
|
|
193
|
+
use: 'intro';
|
|
194
|
+
appliesTo?: DictionaryFamily['use'][];
|
|
195
|
+
};
|
|
196
|
+
type AuthorityIntroBlocker = {
|
|
197
|
+
use: 'authorityIntro';
|
|
198
|
+
appliesTo?: DictionaryFamily['use'][];
|
|
199
|
+
precision?: 'high' | 'aggressive';
|
|
200
|
+
};
|
|
201
|
+
type StopLemmaBlocker = {
|
|
202
|
+
use: 'stopLemma';
|
|
203
|
+
appliesTo?: DictionaryFamily['use'][];
|
|
204
|
+
words: string[];
|
|
205
|
+
};
|
|
206
|
+
type PreviousWordBlocker = {
|
|
207
|
+
use: 'previousWord';
|
|
208
|
+
appliesTo?: DictionaryFamily['use'][];
|
|
209
|
+
words: string[];
|
|
210
|
+
scope?: 'samePage' | 'pageStart' | 'any';
|
|
211
|
+
};
|
|
212
|
+
type PreviousCharBlocker = {
|
|
213
|
+
use: 'previousChar';
|
|
214
|
+
appliesTo?: DictionaryFamily['use'][];
|
|
215
|
+
chars: string[];
|
|
216
|
+
};
|
|
217
|
+
type DictionaryBlocker = PageContinuationBlocker | IntroBlocker | AuthorityIntroBlocker | StopLemmaBlocker | PreviousWordBlocker | PreviousCharBlocker;
|
|
218
|
+
type DictionaryZone = {
|
|
219
|
+
name: string;
|
|
220
|
+
when?: {
|
|
221
|
+
minPageId?: number;
|
|
222
|
+
maxPageId?: number;
|
|
223
|
+
activateAfter?: DictionaryGate[];
|
|
224
|
+
};
|
|
225
|
+
families: DictionaryFamily[];
|
|
226
|
+
blockers?: DictionaryBlocker[];
|
|
227
|
+
};
|
|
228
|
+
type ArabicDictionaryProfile = {
|
|
229
|
+
version: 2;
|
|
230
|
+
zones: DictionaryZone[];
|
|
231
|
+
};
|
|
232
|
+
/** Sampled accepted or rejected candidate from dictionary-profile diagnostics. */
|
|
233
|
+
type DictionaryDiagnosticSample = {
|
|
234
|
+
accepted: boolean;
|
|
235
|
+
absoluteIndex: number;
|
|
236
|
+
family: DictionaryFamilyUse;
|
|
237
|
+
kind: DictionarySegmentKind;
|
|
238
|
+
lemma?: string;
|
|
239
|
+
line: number;
|
|
240
|
+
pageId: number;
|
|
241
|
+
reason?: DictionaryDiagnosticReason;
|
|
242
|
+
text: string;
|
|
243
|
+
zone: string;
|
|
244
|
+
};
|
|
245
|
+
/** Options for dictionary-profile diagnostics collection. */
|
|
246
|
+
type DictionaryProfileDiagnosticsOptions = {
|
|
247
|
+
sampleLimit?: number;
|
|
248
|
+
};
|
|
249
|
+
/** Aggregate diagnostics for tuning a dictionary profile. */
|
|
250
|
+
type DictionaryProfileDiagnostics = {
|
|
251
|
+
acceptedCount: number;
|
|
252
|
+
acceptedKinds: Record<DictionarySegmentKind, number>;
|
|
253
|
+
rejectionReasons: Record<DictionaryDiagnosticReason, number>;
|
|
254
|
+
familyCounts: Record<DictionaryFamilyUse, {
|
|
255
|
+
accepted: number;
|
|
256
|
+
rejected: number;
|
|
257
|
+
}>;
|
|
258
|
+
pageCount: number;
|
|
259
|
+
rejectedCount: number;
|
|
260
|
+
rejectedLemmas: Array<{
|
|
261
|
+
count: number;
|
|
262
|
+
lemma: string;
|
|
263
|
+
}>;
|
|
264
|
+
samples: DictionaryDiagnosticSample[];
|
|
265
|
+
zoneCounts: Record<string, {
|
|
266
|
+
accepted: number;
|
|
267
|
+
rejected: number;
|
|
268
|
+
}>;
|
|
269
|
+
};
|
|
270
|
+
//#endregion
|
|
125
271
|
//#region src/types/rules.d.ts
|
|
126
272
|
/**
|
|
127
273
|
* Literal regex pattern rule - no token expansion or auto-escaping is applied.
|
|
@@ -265,7 +411,7 @@ type LineEndsWithPattern = {
|
|
|
265
411
|
* This captures authoring intent in a serializable shape and is compiled into
|
|
266
412
|
* a regex internally by the rule compiler.
|
|
267
413
|
*/
|
|
268
|
-
|
|
414
|
+
type DictionaryEntryPatternOptions = {
|
|
269
415
|
/**
|
|
270
416
|
* Words that should never be treated as lemmas when followed by a colon.
|
|
271
417
|
*
|
|
@@ -310,7 +456,7 @@ interface DictionaryEntryPatternOptions {
|
|
|
310
456
|
* @default 10
|
|
311
457
|
*/
|
|
312
458
|
maxLetters?: number;
|
|
313
|
-
}
|
|
459
|
+
};
|
|
314
460
|
/**
|
|
315
461
|
* Arabic dictionary entry pattern rule - serializable headword matcher compiled internally.
|
|
316
462
|
*
|
|
@@ -632,18 +778,13 @@ type PreprocessTransform = 'removeZeroWidth' | 'condenseEllipsis' | 'fixTrailing
|
|
|
632
778
|
* error: (msg, ...args) => myLoggingService.error(msg, args),
|
|
633
779
|
* };
|
|
634
780
|
*/
|
|
635
|
-
|
|
636
|
-
/** Log a debug message (verbose debugging output) */
|
|
637
|
-
|
|
638
|
-
/** Log
|
|
639
|
-
|
|
640
|
-
/** Log an informational message (key progress points) */
|
|
641
|
-
info?: (message: string, ...args: unknown[]) => void;
|
|
642
|
-
/** Log a trace message (extremely verbose, per-iteration details) */
|
|
643
|
-
trace?: (message: string, ...args: unknown[]) => void;
|
|
644
|
-
/** Log a warning message (potential issues) */
|
|
781
|
+
type Logger = {
|
|
782
|
+
/** Log a debug message (verbose debugging output) */debug?: (message: string, ...args: unknown[]) => void; /** Log an error message (critical failures) */
|
|
783
|
+
error?: (message: string, ...args: unknown[]) => void; /** Log an informational message (key progress points) */
|
|
784
|
+
info?: (message: string, ...args: unknown[]) => void; /** Log a trace message (extremely verbose, per-iteration details) */
|
|
785
|
+
trace?: (message: string, ...args: unknown[]) => void; /** Log a warning message (potential issues) */
|
|
645
786
|
warn?: (message: string, ...args: unknown[]) => void;
|
|
646
|
-
}
|
|
787
|
+
};
|
|
647
788
|
/**
|
|
648
789
|
* Segmentation options controlling how pages are split.
|
|
649
790
|
*
|
|
@@ -677,6 +818,13 @@ interface Logger {
|
|
|
677
818
|
* };
|
|
678
819
|
*/
|
|
679
820
|
type SegmentationOptions = {
|
|
821
|
+
/**
|
|
822
|
+
* Dictionary profile for Shamela-style Arabic dictionaries.
|
|
823
|
+
*
|
|
824
|
+
* This authoring contract is compiled into internal matchers and merged
|
|
825
|
+
* with any regular `rules`.
|
|
826
|
+
*/
|
|
827
|
+
dictionary?: ArabicDictionaryProfile;
|
|
680
828
|
/**
|
|
681
829
|
* Rules applied in order to find split points.
|
|
682
830
|
*
|
|
@@ -1070,6 +1218,147 @@ type RepeatingSequencePattern = {
|
|
|
1070
1218
|
*/
|
|
1071
1219
|
declare const analyzeRepeatingSequences: (pages: Page[], options?: RepeatingSequenceOptions) => RepeatingSequencePattern[];
|
|
1072
1220
|
//#endregion
|
|
1221
|
+
//#region src/segmentation/pattern-validator.d.ts
|
|
1222
|
+
/**
|
|
1223
|
+
* Types of validation issues that can be detected.
|
|
1224
|
+
*/
|
|
1225
|
+
type ValidationIssueType = 'missing_braces' | 'unknown_token' | 'duplicate' | 'empty_pattern' | 'invalid_regex' | 'invalid_option';
|
|
1226
|
+
/**
|
|
1227
|
+
* A validation issue found in a pattern.
|
|
1228
|
+
*/
|
|
1229
|
+
type ValidationIssue = {
|
|
1230
|
+
type: ValidationIssueType;
|
|
1231
|
+
message: string;
|
|
1232
|
+
suggestion?: string; /** The token name involved in the issue (for unknown_token / missing_braces) */
|
|
1233
|
+
token?: string; /** The specific pattern involved (for duplicate) */
|
|
1234
|
+
pattern?: string;
|
|
1235
|
+
};
|
|
1236
|
+
/**
|
|
1237
|
+
* Validation result for a single rule, with issues keyed by pattern type.
|
|
1238
|
+
* Arrays parallel the input pattern arrays - undefined means no issue.
|
|
1239
|
+
*/
|
|
1240
|
+
type RuleValidationResult = {
|
|
1241
|
+
lineStartsWith?: (ValidationIssue | undefined)[];
|
|
1242
|
+
lineStartsAfter?: (ValidationIssue | undefined)[];
|
|
1243
|
+
lineEndsWith?: (ValidationIssue | undefined)[];
|
|
1244
|
+
template?: ValidationIssue;
|
|
1245
|
+
regex?: ValidationIssue;
|
|
1246
|
+
dictionaryEntry?: Partial<Record<keyof DictionaryEntryPatternOptions, ValidationIssue>>;
|
|
1247
|
+
};
|
|
1248
|
+
/**
|
|
1249
|
+
* Validates split rules for common pattern issues.
|
|
1250
|
+
*
|
|
1251
|
+
* Checks for:
|
|
1252
|
+
* - Missing `{{}}` around known token names (e.g., `raqms:num` instead of `{{raqms:num}}`)
|
|
1253
|
+
* - Unknown token names inside `{{}}` (e.g., `{{nonexistent}}`)
|
|
1254
|
+
* - Duplicate patterns within the same rule
|
|
1255
|
+
*
|
|
1256
|
+
* @param rules - Array of split rules to validate
|
|
1257
|
+
* @returns Array parallel to input with validation results (undefined if no issues)
|
|
1258
|
+
*
|
|
1259
|
+
* @example
|
|
1260
|
+
* const issues = validateRules([
|
|
1261
|
+
* { lineStartsAfter: ['raqms:num'] }, // Missing braces
|
|
1262
|
+
* { lineStartsWith: ['{{unknown}}'] }, // Unknown token
|
|
1263
|
+
* ]);
|
|
1264
|
+
* // issues[0]?.lineStartsAfter?.[0]?.type === 'missing_braces'
|
|
1265
|
+
* // issues[1]?.lineStartsWith?.[0]?.type === 'unknown_token'
|
|
1266
|
+
*/
|
|
1267
|
+
declare const validateRules: (rules: SplitRule[]) => (RuleValidationResult | undefined)[];
|
|
1268
|
+
/**
|
|
1269
|
+
* Formats a validation result array into a list of human-readable error messages.
|
|
1270
|
+
*
|
|
1271
|
+
* Useful for displaying validation errors in UIs.
|
|
1272
|
+
*
|
|
1273
|
+
* @param results - The result array from `validateRules()`
|
|
1274
|
+
* @returns Array of formatted error strings
|
|
1275
|
+
*
|
|
1276
|
+
* @example
|
|
1277
|
+
* const issues = validateRules(rules);
|
|
1278
|
+
* const errors = formatValidationReport(issues);
|
|
1279
|
+
* // ["Rule 1, lineStartsWith: Missing {{}} around token..."]
|
|
1280
|
+
*/
|
|
1281
|
+
declare const formatValidationReport: (results: (RuleValidationResult | undefined)[]) => string[];
|
|
1282
|
+
//#endregion
|
|
1283
|
+
//#region src/analysis/segmentation-advisor.d.ts
|
|
1284
|
+
type SegmentationAdvisorMode = 'structured' | 'continuous' | 'mixed';
|
|
1285
|
+
type SegmentationAdvisorOptions = {
|
|
1286
|
+
topLineStarts?: number;
|
|
1287
|
+
topRepeatingSequences?: number;
|
|
1288
|
+
minLineStartCount?: number;
|
|
1289
|
+
minRepeatingCount?: number;
|
|
1290
|
+
maxRules?: number;
|
|
1291
|
+
sampleSegments?: number;
|
|
1292
|
+
};
|
|
1293
|
+
type PreprocessDetections = {
|
|
1294
|
+
ellipsisCount: number;
|
|
1295
|
+
trailingWawCount: number;
|
|
1296
|
+
zeroWidthCount: number;
|
|
1297
|
+
};
|
|
1298
|
+
type PreprocessSuggestion = {
|
|
1299
|
+
count: number;
|
|
1300
|
+
reason: string;
|
|
1301
|
+
transform: PreprocessTransform;
|
|
1302
|
+
};
|
|
1303
|
+
type RuleSuggestionSource = 'line-start' | 'repeating-sequence';
|
|
1304
|
+
type RuleSuggestionConfidence = 'high' | 'medium' | 'low';
|
|
1305
|
+
type SuggestedRule = {
|
|
1306
|
+
confidence: RuleSuggestionConfidence;
|
|
1307
|
+
count: number;
|
|
1308
|
+
example: {
|
|
1309
|
+
pageId: number;
|
|
1310
|
+
text: string;
|
|
1311
|
+
};
|
|
1312
|
+
pattern: string;
|
|
1313
|
+
reason: string;
|
|
1314
|
+
rule: SplitRule;
|
|
1315
|
+
source: RuleSuggestionSource;
|
|
1316
|
+
};
|
|
1317
|
+
type BreakpointSuggestion = {
|
|
1318
|
+
breakpoints: Breakpoint[];
|
|
1319
|
+
maxPages: number;
|
|
1320
|
+
prefer: 'longer' | 'shorter';
|
|
1321
|
+
reason: string;
|
|
1322
|
+
};
|
|
1323
|
+
type SegmentationEvaluation = {
|
|
1324
|
+
averageSegmentLength: number;
|
|
1325
|
+
maxSegmentLength: number;
|
|
1326
|
+
multiPageSegments: number;
|
|
1327
|
+
segmentCount: number;
|
|
1328
|
+
validation: SegmentValidationReport;
|
|
1329
|
+
};
|
|
1330
|
+
type SegmentationSuggestionReport = {
|
|
1331
|
+
assessment: {
|
|
1332
|
+
mode: SegmentationAdvisorMode;
|
|
1333
|
+
reason: string;
|
|
1334
|
+
};
|
|
1335
|
+
breakpointSuggestions: BreakpointSuggestion[];
|
|
1336
|
+
evaluation?: SegmentationEvaluation;
|
|
1337
|
+
lineStarts: CommonLineStartPattern[];
|
|
1338
|
+
optimization: {
|
|
1339
|
+
mergedCount: number;
|
|
1340
|
+
optimizedRuleCount: number;
|
|
1341
|
+
originalRuleCount: number;
|
|
1342
|
+
};
|
|
1343
|
+
preprocess: {
|
|
1344
|
+
detections: PreprocessDetections;
|
|
1345
|
+
suggestions: PreprocessSuggestion[];
|
|
1346
|
+
};
|
|
1347
|
+
recommendedOptions: SegmentationOptions;
|
|
1348
|
+
repeatingSequences: RepeatingSequencePattern[];
|
|
1349
|
+
ruleSuggestions: SuggestedRule[];
|
|
1350
|
+
ruleValidation: RuleValidationResult[];
|
|
1351
|
+
ruleValidationErrors: string[];
|
|
1352
|
+
segmentSamples: Segment[];
|
|
1353
|
+
};
|
|
1354
|
+
/**
|
|
1355
|
+
* Generate a machine-readable draft segmentation report for AI agents.
|
|
1356
|
+
*
|
|
1357
|
+
* This helper is intentionally deterministic: it inspects pages, drafts
|
|
1358
|
+
* candidate rules, validates them, and evaluates its own recommendation.
|
|
1359
|
+
*/
|
|
1360
|
+
declare const suggestSegmentationOptions: (pages: Page[], options?: SegmentationAdvisorOptions) => SegmentationSuggestionReport;
|
|
1361
|
+
//#endregion
|
|
1073
1362
|
//#region src/detection.d.ts
|
|
1074
1363
|
/**
|
|
1075
1364
|
* Result of detecting a token pattern in text
|
|
@@ -1135,6 +1424,113 @@ declare const analyzeTextForRule: (text: string) => {
|
|
|
1135
1424
|
detected: DetectedPattern[];
|
|
1136
1425
|
} | null;
|
|
1137
1426
|
//#endregion
|
|
1427
|
+
//#region src/dictionary/arabic-dictionary-rule.d.ts
|
|
1428
|
+
interface ArabicDictionaryEntryRuleOptions extends DictionaryEntryPatternOptions {
|
|
1429
|
+
/**
|
|
1430
|
+
* Suppress page-start matches when the previous page's last Arabic word
|
|
1431
|
+
* is in this stoplist, unless that page ends with strong sentence punctuation.
|
|
1432
|
+
*/
|
|
1433
|
+
pageStartPrevWordStoplist?: string[];
|
|
1434
|
+
/**
|
|
1435
|
+
* Suppress non-page-start matches when the immediately previous Arabic word
|
|
1436
|
+
* on the same page is in this stoplist.
|
|
1437
|
+
*/
|
|
1438
|
+
samePagePrevWordStoplist?: string[];
|
|
1439
|
+
/**
|
|
1440
|
+
* Static metadata merged into matching segments.
|
|
1441
|
+
*/
|
|
1442
|
+
meta?: Record<string, unknown>;
|
|
1443
|
+
}
|
|
1444
|
+
/**
|
|
1445
|
+
* Creates a reusable split rule for Arabic dictionary entries.
|
|
1446
|
+
*
|
|
1447
|
+
* The returned rule preserves authoring intent as a serializable
|
|
1448
|
+
* `{ dictionaryEntry: ... }` pattern rather than eagerly compiling to a raw
|
|
1449
|
+
* regex string.
|
|
1450
|
+
*
|
|
1451
|
+
* @example
|
|
1452
|
+
* createArabicDictionaryEntryRule({
|
|
1453
|
+
* stopWords: ['وقيل', 'ويقال', 'قال'],
|
|
1454
|
+
* pageStartPrevWordStoplist: ['قال', 'وقيل', 'ويقال'],
|
|
1455
|
+
* })
|
|
1456
|
+
*
|
|
1457
|
+
* @example
|
|
1458
|
+
* createArabicDictionaryEntryRule({
|
|
1459
|
+
* allowParenthesized: true,
|
|
1460
|
+
* allowWhitespaceBeforeColon: true,
|
|
1461
|
+
* allowCommaSeparated: true,
|
|
1462
|
+
* stopWords: ['الليث', 'العجاج'],
|
|
1463
|
+
* })
|
|
1464
|
+
*/
|
|
1465
|
+
/**
|
|
1466
|
+
* @deprecated Prefer the top-level `SegmentationOptions.dictionary` profile for
|
|
1467
|
+
* whole-book dictionary segmentation. Keep this helper for advanced single-rule
|
|
1468
|
+
* composition inside a broader `SplitRule[]` pipeline.
|
|
1469
|
+
*/
|
|
1470
|
+
declare const createArabicDictionaryEntryRule: ({
|
|
1471
|
+
allowCommaSeparated,
|
|
1472
|
+
allowParenthesized,
|
|
1473
|
+
allowWhitespaceBeforeColon,
|
|
1474
|
+
captureName,
|
|
1475
|
+
maxLetters,
|
|
1476
|
+
meta,
|
|
1477
|
+
midLineSubentries,
|
|
1478
|
+
minLetters,
|
|
1479
|
+
pageStartPrevWordStoplist,
|
|
1480
|
+
samePagePrevWordStoplist,
|
|
1481
|
+
stopWords
|
|
1482
|
+
}: ArabicDictionaryEntryRuleOptions) => SplitRule;
|
|
1483
|
+
//#endregion
|
|
1484
|
+
//#region src/dictionary/heading-classifier.d.ts
|
|
1485
|
+
type DictionarySurfaceKind = DictionaryHeadingScanClass | 'lineEntry' | 'inlineSubentry' | 'codeLine' | 'pairedForms';
|
|
1486
|
+
type DictionarySurfaceMatch = {
|
|
1487
|
+
kind: DictionarySurfaceKind;
|
|
1488
|
+
pageId: number;
|
|
1489
|
+
text: string;
|
|
1490
|
+
lemma?: string;
|
|
1491
|
+
line: number;
|
|
1492
|
+
};
|
|
1493
|
+
type DictionaryMarkdownPage = {
|
|
1494
|
+
content: string;
|
|
1495
|
+
id: number;
|
|
1496
|
+
};
|
|
1497
|
+
type DictionarySurfaceReport = {
|
|
1498
|
+
counts: Record<DictionarySurfaceKind, number>;
|
|
1499
|
+
matches: DictionarySurfaceMatch[];
|
|
1500
|
+
};
|
|
1501
|
+
/**
|
|
1502
|
+
* Classifies a markdown heading line produced by `convertContentToMarkdown()`.
|
|
1503
|
+
*/
|
|
1504
|
+
declare const classifyDictionaryHeading: (line: string) => DictionaryHeadingScanClass;
|
|
1505
|
+
/**
|
|
1506
|
+
* Extracts dictionary surface matches from a markdown page.
|
|
1507
|
+
*/
|
|
1508
|
+
declare const scanDictionaryMarkdownPage: (page: DictionaryMarkdownPage) => DictionarySurfaceMatch[];
|
|
1509
|
+
/**
|
|
1510
|
+
* Aggregates dictionary surface counts across markdown pages.
|
|
1511
|
+
*/
|
|
1512
|
+
declare const analyzeDictionaryMarkdownPages: (pages: DictionaryMarkdownPage[]) => DictionarySurfaceReport;
|
|
1513
|
+
//#endregion
|
|
1514
|
+
//#region src/dictionary/profile.d.ts
|
|
1515
|
+
declare class DictionaryProfileValidationError extends Error {
|
|
1516
|
+
readonly issues: DictionaryProfileValidationIssue[];
|
|
1517
|
+
constructor(issues: DictionaryProfileValidationIssue[]);
|
|
1518
|
+
}
|
|
1519
|
+
/**
|
|
1520
|
+
* Validates a dictionary profile without normalizing it.
|
|
1521
|
+
*/
|
|
1522
|
+
declare const validateDictionaryProfile: (profile: ArabicDictionaryProfile) => DictionaryProfileValidationIssue[];
|
|
1523
|
+
//#endregion
|
|
1524
|
+
//#region src/dictionary/dictionary-diagnostics.d.ts
|
|
1525
|
+
/**
|
|
1526
|
+
* Collects tuning-oriented diagnostics for a dictionary profile without creating
|
|
1527
|
+
* segments. This output is intended for profile authoring workflows rather than
|
|
1528
|
+
* long-term compatibility guarantees.
|
|
1529
|
+
*
|
|
1530
|
+
* This is useful when tuning blockers and family choices for a new dictionary.
|
|
1531
|
+
*/
|
|
1532
|
+
declare const diagnoseDictionaryProfile: (pages: Page[], profile: ArabicDictionaryProfile, options?: DictionaryProfileDiagnosticsOptions) => DictionaryProfileDiagnostics;
|
|
1533
|
+
//#endregion
|
|
1138
1534
|
//#region src/optimization/optimize-rules.d.ts
|
|
1139
1535
|
/**
|
|
1140
1536
|
* Result from optimizing rules.
|
|
@@ -1189,58 +1585,6 @@ declare const fixTrailingWaw: (text: string) => string;
|
|
|
1189
1585
|
*/
|
|
1190
1586
|
declare const applyPreprocessToPage: (content: string, pageId: number, transforms: PreprocessTransform[]) => string;
|
|
1191
1587
|
//#endregion
|
|
1192
|
-
//#region src/segmentation/arabic-dictionary-rule.d.ts
|
|
1193
|
-
interface ArabicDictionaryEntryRuleOptions extends DictionaryEntryPatternOptions {
|
|
1194
|
-
/**
|
|
1195
|
-
* Suppress page-start matches when the previous page's last Arabic word
|
|
1196
|
-
* is in this stoplist, unless that page ends with strong sentence punctuation.
|
|
1197
|
-
*/
|
|
1198
|
-
pageStartPrevWordStoplist?: string[];
|
|
1199
|
-
/**
|
|
1200
|
-
* Suppress non-page-start matches when the immediately previous Arabic word
|
|
1201
|
-
* on the same page is in this stoplist.
|
|
1202
|
-
*/
|
|
1203
|
-
samePagePrevWordStoplist?: string[];
|
|
1204
|
-
/**
|
|
1205
|
-
* Static metadata merged into matching segments.
|
|
1206
|
-
*/
|
|
1207
|
-
meta?: Record<string, unknown>;
|
|
1208
|
-
}
|
|
1209
|
-
/**
|
|
1210
|
-
* Creates a reusable split rule for Arabic dictionary entries.
|
|
1211
|
-
*
|
|
1212
|
-
* The returned rule preserves authoring intent as a serializable
|
|
1213
|
-
* `{ dictionaryEntry: ... }` pattern rather than eagerly compiling to a raw
|
|
1214
|
-
* regex string.
|
|
1215
|
-
*
|
|
1216
|
-
* @example
|
|
1217
|
-
* createArabicDictionaryEntryRule({
|
|
1218
|
-
* stopWords: ['وقيل', 'ويقال', 'قال'],
|
|
1219
|
-
* pageStartPrevWordStoplist: ['قال', 'وقيل', 'ويقال'],
|
|
1220
|
-
* })
|
|
1221
|
-
*
|
|
1222
|
-
* @example
|
|
1223
|
-
* createArabicDictionaryEntryRule({
|
|
1224
|
-
* allowParenthesized: true,
|
|
1225
|
-
* allowWhitespaceBeforeColon: true,
|
|
1226
|
-
* allowCommaSeparated: true,
|
|
1227
|
-
* stopWords: ['الليث', 'العجاج'],
|
|
1228
|
-
* })
|
|
1229
|
-
*/
|
|
1230
|
-
declare const createArabicDictionaryEntryRule: ({
|
|
1231
|
-
allowCommaSeparated,
|
|
1232
|
-
allowParenthesized,
|
|
1233
|
-
allowWhitespaceBeforeColon,
|
|
1234
|
-
captureName,
|
|
1235
|
-
maxLetters,
|
|
1236
|
-
meta,
|
|
1237
|
-
midLineSubentries,
|
|
1238
|
-
minLetters,
|
|
1239
|
-
pageStartPrevWordStoplist,
|
|
1240
|
-
samePagePrevWordStoplist,
|
|
1241
|
-
stopWords
|
|
1242
|
-
}: ArabicDictionaryEntryRuleOptions) => SplitRule;
|
|
1243
|
-
//#endregion
|
|
1244
1588
|
//#region src/segmentation/breakpoint-utils.d.ts
|
|
1245
1589
|
/**
|
|
1246
1590
|
* Escapes regex metacharacters outside of `{{token}}` delimiters.
|
|
@@ -1292,68 +1636,6 @@ declare const getDebugReason: (meta: Record<string, any> | undefined, options?:
|
|
|
1292
1636
|
*/
|
|
1293
1637
|
declare const getSegmentDebugReason: (segment: Segment, options?: DebugReasonOptions) => string;
|
|
1294
1638
|
//#endregion
|
|
1295
|
-
//#region src/segmentation/pattern-validator.d.ts
|
|
1296
|
-
/**
|
|
1297
|
-
* Types of validation issues that can be detected.
|
|
1298
|
-
*/
|
|
1299
|
-
type ValidationIssueType = 'missing_braces' | 'unknown_token' | 'duplicate' | 'empty_pattern' | 'invalid_regex' | 'invalid_option';
|
|
1300
|
-
/**
|
|
1301
|
-
* A validation issue found in a pattern.
|
|
1302
|
-
*/
|
|
1303
|
-
type ValidationIssue = {
|
|
1304
|
-
type: ValidationIssueType;
|
|
1305
|
-
message: string;
|
|
1306
|
-
suggestion?: string; /** The token name involved in the issue (for unknown_token / missing_braces) */
|
|
1307
|
-
token?: string; /** The specific pattern involved (for duplicate) */
|
|
1308
|
-
pattern?: string;
|
|
1309
|
-
};
|
|
1310
|
-
/**
|
|
1311
|
-
* Validation result for a single rule, with issues keyed by pattern type.
|
|
1312
|
-
* Arrays parallel the input pattern arrays - undefined means no issue.
|
|
1313
|
-
*/
|
|
1314
|
-
type RuleValidationResult = {
|
|
1315
|
-
lineStartsWith?: (ValidationIssue | undefined)[];
|
|
1316
|
-
lineStartsAfter?: (ValidationIssue | undefined)[];
|
|
1317
|
-
lineEndsWith?: (ValidationIssue | undefined)[];
|
|
1318
|
-
template?: ValidationIssue;
|
|
1319
|
-
regex?: ValidationIssue;
|
|
1320
|
-
dictionaryEntry?: Partial<Record<keyof DictionaryEntryPatternOptions, ValidationIssue>>;
|
|
1321
|
-
};
|
|
1322
|
-
/**
|
|
1323
|
-
* Validates split rules for common pattern issues.
|
|
1324
|
-
*
|
|
1325
|
-
* Checks for:
|
|
1326
|
-
* - Missing `{{}}` around known token names (e.g., `raqms:num` instead of `{{raqms:num}}`)
|
|
1327
|
-
* - Unknown token names inside `{{}}` (e.g., `{{nonexistent}}`)
|
|
1328
|
-
* - Duplicate patterns within the same rule
|
|
1329
|
-
*
|
|
1330
|
-
* @param rules - Array of split rules to validate
|
|
1331
|
-
* @returns Array parallel to input with validation results (undefined if no issues)
|
|
1332
|
-
*
|
|
1333
|
-
* @example
|
|
1334
|
-
* const issues = validateRules([
|
|
1335
|
-
* { lineStartsAfter: ['raqms:num'] }, // Missing braces
|
|
1336
|
-
* { lineStartsWith: ['{{unknown}}'] }, // Unknown token
|
|
1337
|
-
* ]);
|
|
1338
|
-
* // issues[0]?.lineStartsAfter?.[0]?.type === 'missing_braces'
|
|
1339
|
-
* // issues[1]?.lineStartsWith?.[0]?.type === 'unknown_token'
|
|
1340
|
-
*/
|
|
1341
|
-
declare const validateRules: (rules: SplitRule[]) => (RuleValidationResult | undefined)[];
|
|
1342
|
-
/**
|
|
1343
|
-
* Formats a validation result array into a list of human-readable error messages.
|
|
1344
|
-
*
|
|
1345
|
-
* Useful for displaying validation errors in UIs.
|
|
1346
|
-
*
|
|
1347
|
-
* @param results - The result array from `validateRules()`
|
|
1348
|
-
* @returns Array of formatted error strings
|
|
1349
|
-
*
|
|
1350
|
-
* @example
|
|
1351
|
-
* const issues = validateRules(rules);
|
|
1352
|
-
* const errors = formatValidationReport(issues);
|
|
1353
|
-
* // ["Rule 1, lineStartsWith: Missing {{}} around token..."]
|
|
1354
|
-
*/
|
|
1355
|
-
declare const formatValidationReport: (results: (RuleValidationResult | undefined)[]) => string[];
|
|
1356
|
-
//#endregion
|
|
1357
1639
|
//#region src/segmentation/segmenter.d.ts
|
|
1358
1640
|
/**
|
|
1359
1641
|
* Segments pages of content based on pattern-matching rules.
|
|
@@ -1420,6 +1702,25 @@ declare const ARABIC_LETTER_WITH_OPTIONAL_MARKS_PATTERN = "[\u0621-\u063A\u0641-
|
|
|
1420
1702
|
* One or more Arabic letters, where each letter may carry combining marks.
|
|
1421
1703
|
*/
|
|
1422
1704
|
declare const ARABIC_WORD_WITH_OPTIONAL_MARKS_PATTERN = "(?:[\u0621-\u063A\u0641-\u064A][\\u0610-\\u061A\\u0640\\u064B-\\u065F\\u0670\\u06D6-\\u06ED]*)+";
|
|
1705
|
+
declare const BASE_TOKENS: {
|
|
1706
|
+
/** Chapter marker (باب). */readonly bab: "باب"; /** Basmala (بسم الله). Also matches ﷽. */
|
|
1707
|
+
readonly basmalah: string; /** Bullet point variants: `•`, `*`, `°`. */
|
|
1708
|
+
readonly bullet: "[•*°]"; /** Dash variants: `-` (U+002D), `–` (U+2013), `—` (U+2014), `ـ` (tatweel U+0640). */
|
|
1709
|
+
readonly dash: "[-–—ـ]"; /** Section marker (فصل / مسألة). */
|
|
1710
|
+
readonly fasl: string; /** Single Arabic letter (أ-ي). Does NOT include diacritics. */
|
|
1711
|
+
readonly harf: "[أ-ي]"; /** One or more single Arabic letters separated by spaces, allowing marks/tatweel on each isolated letter (e.g. `د ت س`, `هـ ث`). For multi-letter codes use `{{rumuz}}`. */
|
|
1712
|
+
readonly harfs: "[أ-غف-ي][\\u0610-\\u061A\\u0640\\u064B-\\u065F\\u0670\\u06D6-\\u06ED]*(?:\\s+[أ-غف-ي][\\u0610-\\u061A\\u0640\\u064B-\\u065F\\u0670\\u06D6-\\u06ED]*)*"; /** Horizontal rule / separator: 5+ repeated dashes, underscores, equals, or tatweels. Mixed allowed. */
|
|
1713
|
+
readonly hr: "[-–—ـ_=]{5,}"; /** Book marker (كتاب). */
|
|
1714
|
+
readonly kitab: "كتاب"; /** Hadith transmission phrases (حدثنا, أخبرنا, حدثني, etc.). */
|
|
1715
|
+
readonly naql: string; /** Newline character. Useful for breakpoints that split on line boundaries. */
|
|
1716
|
+
readonly newline: "\\n"; /** Single ASCII digit (0-9). */
|
|
1717
|
+
readonly num: "\\d"; /** One or more ASCII digits (0-9)+. */
|
|
1718
|
+
readonly nums: "\\d+"; /** Single Arabic-Indic digit (٠-٩, U+0660-U+0669). */
|
|
1719
|
+
readonly raqm: "[\\u0660-\\u0669]"; /** One or more Arabic-Indic digits (٠-٩)+. */
|
|
1720
|
+
readonly raqms: "[\\u0660-\\u0669]+"; /** Rijāl/takhrīj source abbreviations. Matches one or more codes separated by whitespace. */
|
|
1721
|
+
readonly rumuz: string; /** Arabic/common punctuation: `.`, `!`, `?`, `؟`, `؛`. */
|
|
1722
|
+
readonly tarqim: "[.!?؟؛]";
|
|
1723
|
+
};
|
|
1423
1724
|
/** Pre-defined token constants for use in patterns. */
|
|
1424
1725
|
declare const Token: {
|
|
1425
1726
|
/** Chapter marker - باب */readonly BAB: "{{bab}}"; /** Basmala - بسم الله */
|
|
@@ -1445,12 +1746,18 @@ declare const Token: {
|
|
|
1445
1746
|
* Type representing valid token constant keys.
|
|
1446
1747
|
*/
|
|
1447
1748
|
type TokenKey = keyof typeof Token;
|
|
1749
|
+
/** Wraps a token constant with a named capture: `{{token}}` → `{{token:name}}`. */
|
|
1750
|
+
declare const withCapture: (token: string, name: string) => string;
|
|
1751
|
+
/** Composite tokens that reference base tokens. Pre-expanded at load time. @internal */
|
|
1752
|
+
declare const COMPOSITE_TOKENS: {
|
|
1753
|
+
/** Common hadith numbering format: Arabic-Indic digits + dash + space. */readonly numbered: "{{raqms}} {{dash}} ";
|
|
1754
|
+
};
|
|
1755
|
+
type BaseTokenName = keyof typeof BASE_TOKENS;
|
|
1756
|
+
type CompositeTokenName = keyof typeof COMPOSITE_TOKENS;
|
|
1448
1757
|
/**
|
|
1449
1758
|
* Type representing valid token pattern names for `getTokenPattern()`.
|
|
1450
1759
|
*/
|
|
1451
|
-
type TokenPatternName =
|
|
1452
|
-
/** Wraps a token constant with a named capture: `{{token}}` → `{{token:name}}`. */
|
|
1453
|
-
declare const withCapture: (token: string, name: string) => string;
|
|
1760
|
+
type TokenPatternName = BaseTokenName | CompositeTokenName;
|
|
1454
1761
|
/** Expands composite tokens (e.g. `{{numbered}}`) to their underlying template form. */
|
|
1455
1762
|
declare const expandCompositeTokensInTemplate: (template: string) => string;
|
|
1456
1763
|
/**
|
|
@@ -1479,7 +1786,8 @@ declare const expandCompositeTokensInTemplate: (template: string) => string;
|
|
|
1479
1786
|
* { lineStartsAfter: ['{{numbered}}'], split: 'at' }
|
|
1480
1787
|
*/
|
|
1481
1788
|
declare const TOKEN_PATTERNS: {
|
|
1482
|
-
|
|
1789
|
+
readonly numbered: string; /** Chapter marker (باب). */
|
|
1790
|
+
readonly bab: "باب"; /** Basmala (بسم الله). Also matches ﷽. */
|
|
1483
1791
|
readonly basmalah: string; /** Bullet point variants: `•`, `*`, `°`. */
|
|
1484
1792
|
readonly bullet: "[•*°]"; /** Dash variants: `-` (U+002D), `–` (U+2013), `—` (U+2014), `ـ` (tatweel U+0640). */
|
|
1485
1793
|
readonly dash: "[-–—ـ]"; /** Section marker (فصل / مسألة). */
|
|
@@ -1784,5 +2092,5 @@ type ValidationOptions = {
|
|
|
1784
2092
|
*/
|
|
1785
2093
|
declare const validateSegments: (pages: Page[], options: SegmentationOptions, segments: Segment[], validationOptions?: ValidationOptions) => SegmentValidationReport;
|
|
1786
2094
|
//#endregion
|
|
1787
|
-
export { ARABIC_BASE_LETTER_CLASS, ARABIC_LETTER_WITH_OPTIONAL_MARKS_PATTERN, ARABIC_MARKS_CLASS, ARABIC_WORD_WITH_OPTIONAL_MARKS_PATTERN, type ArabicDictionaryEntryRuleOptions, type Breakpoint, type BreakpointRule, type CommonLineStartPattern, type CondenseEllipsisRule, type DetectedPattern, type DictionaryEntryPatternOptions, type ExpandResult, type FixTrailingWawRule, type LineStartAnalysisOptions, type LineStartPatternExample, type Logger, type OptimizeResult, PATTERN_TYPE_KEYS, type Page, type PageRange, type PageRangeConstraint, type PageRangeConstraintWithExclude, type PatternProcessor, type PatternTypeKey, type PreprocessTransform, type RemoveZeroWidthRule, type RepeatingSequenceExample, type RepeatingSequenceOptions, type RepeatingSequencePattern, type RuleValidationResult, type Segment, type SegmentValidationIssue, type SegmentValidationIssueSeverity, type SegmentValidationIssueType, type SegmentValidationReport, type SegmentationOptions, type SplitRule, TOKEN_PATTERNS, Token, type TokenKey, type TokenMapping, type TokenPatternName, type ValidationIssue, type ValidationIssueType, type ValidationOptions, analyzeCommonLineStarts, analyzeRepeatingSequences, analyzeTextForRule, applyPreprocessToPage, applyTokenMappings, condenseEllipsis, containsTokens, createArabicDictionaryEntryRule, detectTokenPatterns, escapeRegex, escapeTemplateBrackets, escapeWordsOutsideTokens, expandCompositeTokensInTemplate, expandTokens, expandTokensWithCaptures, fixTrailingWaw, formatValidationReport, generateTemplateFromText, getAvailableTokens, getDebugReason, getSegmentDebugReason, getTokenPattern, makeDiacriticInsensitive, normalizeArabicForComparison, optimizeRules, removeZeroWidth, segmentPages, shouldDefaultToFuzzy, stripTokenMappings, suggestPatternConfig, templateToRegex, validateRules, validateSegments, withCapture };
|
|
2095
|
+
export { ARABIC_BASE_LETTER_CLASS, ARABIC_LETTER_WITH_OPTIONAL_MARKS_PATTERN, ARABIC_MARKS_CLASS, ARABIC_WORD_WITH_OPTIONAL_MARKS_PATTERN, type ArabicDictionaryEntryRuleOptions, type ArabicDictionaryProfile, type Breakpoint, type BreakpointRule, type BreakpointSuggestion, type CommonLineStartPattern, type CondenseEllipsisRule, type DetectedPattern, type DictionaryBlocker, type DictionaryDiagnosticReason, type DictionaryDiagnosticSample, type DictionaryEntryPatternOptions, type DictionaryFamily, type DictionaryFamilyUse, type DictionaryGate, type DictionaryHeadingClass, type DictionaryHeadingScanClass, type DictionaryMarkdownPage, type DictionaryProfileDiagnostics, type DictionaryProfileDiagnosticsOptions, DictionaryProfileValidationError, type DictionaryProfileValidationIssue, type DictionaryProfileValidationIssueCode, type DictionarySegmentKind, type DictionarySegmentMeta, type DictionarySurfaceKind, type DictionarySurfaceMatch, type DictionarySurfaceReport, type DictionaryZone, type ExpandResult, type FixTrailingWawRule, type LineStartAnalysisOptions, type LineStartPatternExample, type Logger, type OptimizeResult, PATTERN_TYPE_KEYS, type Page, type PageRange, type PageRangeConstraint, type PageRangeConstraintWithExclude, type PatternProcessor, type PatternTypeKey, type PreprocessDetections, type PreprocessSuggestion, type PreprocessTransform, type RemoveZeroWidthRule, type RepeatingSequenceExample, type RepeatingSequenceOptions, type RepeatingSequencePattern, type RuleSuggestionConfidence, type RuleSuggestionSource, type RuleValidationResult, type Segment, type SegmentValidationIssue, type SegmentValidationIssueSeverity, type SegmentValidationIssueType, type SegmentValidationReport, type SegmentationAdvisorMode, type SegmentationAdvisorOptions, type SegmentationEvaluation, type SegmentationOptions, type SegmentationSuggestionReport, type SplitRule, type SuggestedRule, TOKEN_PATTERNS, Token, type TokenKey, type TokenMapping, type TokenPatternName, type ValidationIssue, type ValidationIssueType, type ValidationOptions, analyzeCommonLineStarts, analyzeDictionaryMarkdownPages, analyzeRepeatingSequences, analyzeTextForRule, applyPreprocessToPage, applyTokenMappings, classifyDictionaryHeading, condenseEllipsis, containsTokens, createArabicDictionaryEntryRule, detectTokenPatterns, diagnoseDictionaryProfile, escapeRegex, escapeTemplateBrackets, escapeWordsOutsideTokens, expandCompositeTokensInTemplate, expandTokens, expandTokensWithCaptures, fixTrailingWaw, formatValidationReport, generateTemplateFromText, getAvailableTokens, getDebugReason, getSegmentDebugReason, getTokenPattern, makeDiacriticInsensitive, normalizeArabicForComparison, optimizeRules, removeZeroWidth, scanDictionaryMarkdownPage, segmentPages, shouldDefaultToFuzzy, stripTokenMappings, suggestPatternConfig, suggestSegmentationOptions, templateToRegex, validateDictionaryProfile, validateRules, validateSegments, withCapture };
|
|
1788
2096
|
//# sourceMappingURL=index.d.mts.map
|