cmpstr 3.0.1 → 3.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -1
- package/dist/CmpStr.esm.js +242 -24
- package/dist/CmpStr.esm.js.map +1 -1
- package/dist/CmpStr.esm.min.js +2 -2
- package/dist/CmpStr.esm.min.js.map +1 -1
- package/dist/CmpStr.umd.js +242 -24
- package/dist/CmpStr.umd.js.map +1 -1
- package/dist/CmpStr.umd.min.js +2 -2
- package/dist/CmpStr.umd.min.js.map +1 -1
- package/dist/cjs/CmpStr.cjs +2 -1
- package/dist/cjs/CmpStr.cjs.map +1 -1
- package/dist/cjs/CmpStrAsync.cjs +1 -1
- package/dist/cjs/CmpStrAsync.cjs.map +1 -1
- package/dist/cjs/index.cjs +1 -1
- package/dist/cjs/metric/Cosine.cjs +1 -1
- package/dist/cjs/metric/Cosine.cjs.map +1 -1
- package/dist/cjs/metric/DamerauLevenshtein.cjs +1 -1
- package/dist/cjs/metric/DamerauLevenshtein.cjs.map +1 -1
- package/dist/cjs/metric/DiceSorensen.cjs +1 -1
- package/dist/cjs/metric/DiceSorensen.cjs.map +1 -1
- package/dist/cjs/metric/Hamming.cjs +2 -2
- package/dist/cjs/metric/Hamming.cjs.map +1 -1
- package/dist/cjs/metric/Jaccard.cjs +1 -1
- package/dist/cjs/metric/Jaccard.cjs.map +1 -1
- package/dist/cjs/metric/JaroWinkler.cjs +1 -1
- package/dist/cjs/metric/JaroWinkler.cjs.map +1 -1
- package/dist/cjs/metric/LCS.cjs +1 -1
- package/dist/cjs/metric/LCS.cjs.map +1 -1
- package/dist/cjs/metric/Levenshtein.cjs +1 -1
- package/dist/cjs/metric/Levenshtein.cjs.map +1 -1
- package/dist/cjs/metric/Metric.cjs +5 -5
- package/dist/cjs/metric/Metric.cjs.map +1 -1
- package/dist/cjs/metric/NeedlemanWunsch.cjs +1 -1
- package/dist/cjs/metric/NeedlemanWunsch.cjs.map +1 -1
- package/dist/cjs/metric/SmithWaterman.cjs +1 -1
- package/dist/cjs/metric/SmithWaterman.cjs.map +1 -1
- package/dist/cjs/metric/qGram.cjs +1 -1
- package/dist/cjs/metric/qGram.cjs.map +1 -1
- package/dist/cjs/phonetic/Caverphone.cjs +199 -0
- package/dist/cjs/phonetic/Caverphone.cjs.map +1 -0
- package/dist/cjs/phonetic/Cologne.cjs +1 -1
- package/dist/cjs/phonetic/Cologne.cjs.map +1 -1
- package/dist/cjs/phonetic/Metaphone.cjs +1 -1
- package/dist/cjs/phonetic/Metaphone.cjs.map +1 -1
- package/dist/cjs/phonetic/Phonetic.cjs +50 -16
- package/dist/cjs/phonetic/Phonetic.cjs.map +1 -1
- package/dist/cjs/phonetic/Soundex.cjs +1 -1
- package/dist/cjs/phonetic/Soundex.cjs.map +1 -1
- package/dist/cjs/utils/DeepMerge.cjs +2 -2
- package/dist/cjs/utils/DeepMerge.cjs.map +1 -1
- package/dist/cjs/utils/DiffChecker.cjs +10 -10
- package/dist/cjs/utils/DiffChecker.cjs.map +1 -1
- package/dist/cjs/utils/Filter.cjs +1 -1
- package/dist/cjs/utils/Filter.cjs.map +1 -1
- package/dist/cjs/utils/HashTable.cjs +1 -1
- package/dist/cjs/utils/HashTable.cjs.map +1 -1
- package/dist/cjs/utils/Normalizer.cjs +1 -1
- package/dist/cjs/utils/Normalizer.cjs.map +1 -1
- package/dist/cjs/utils/Pool.cjs +1 -1
- package/dist/cjs/utils/Pool.cjs.map +1 -1
- package/dist/cjs/utils/Profiler.cjs +3 -3
- package/dist/cjs/utils/Profiler.cjs.map +1 -1
- package/dist/cjs/utils/Registry.cjs +7 -7
- package/dist/cjs/utils/Registry.cjs.map +1 -1
- package/dist/cjs/utils/TextAnalyzer.cjs +2 -2
- package/dist/cjs/utils/TextAnalyzer.cjs.map +1 -1
- package/dist/esm/{CmpStr.js → CmpStr.mjs} +26 -25
- package/dist/esm/CmpStr.mjs.map +1 -0
- package/dist/esm/{CmpStrAsync.js → CmpStrAsync.mjs} +6 -6
- package/dist/esm/CmpStrAsync.mjs.map +1 -0
- package/dist/esm/index.mjs +7 -0
- package/dist/esm/index.mjs.map +1 -0
- package/dist/esm/metric/{Cosine.js → Cosine.mjs} +4 -4
- package/dist/esm/metric/Cosine.mjs.map +1 -0
- package/dist/esm/metric/{DamerauLevenshtein.js → DamerauLevenshtein.mjs} +4 -4
- package/dist/esm/metric/DamerauLevenshtein.mjs.map +1 -0
- package/dist/esm/metric/{DiceSorensen.js → DiceSorensen.mjs} +4 -4
- package/dist/esm/metric/DiceSorensen.mjs.map +1 -0
- package/dist/esm/metric/{Hamming.js → Hamming.mjs} +4 -4
- package/dist/esm/metric/Hamming.mjs.map +1 -0
- package/dist/esm/metric/{Jaccard.js → Jaccard.mjs} +4 -4
- package/dist/esm/metric/Jaccard.mjs.map +1 -0
- package/dist/esm/metric/{JaroWinkler.js → JaroWinkler.mjs} +4 -4
- package/dist/esm/metric/JaroWinkler.mjs.map +1 -0
- package/dist/esm/metric/{LCS.js → LCS.mjs} +4 -4
- package/dist/esm/metric/LCS.mjs.map +1 -0
- package/dist/esm/metric/{Levenshtein.js → Levenshtein.mjs} +4 -4
- package/dist/esm/metric/Levenshtein.mjs.map +1 -0
- package/dist/esm/metric/{Metric.js → Metric.mjs} +9 -9
- package/dist/esm/metric/Metric.mjs.map +1 -0
- package/dist/esm/metric/{NeedlemanWunsch.js → NeedlemanWunsch.mjs} +4 -4
- package/dist/esm/metric/NeedlemanWunsch.mjs.map +1 -0
- package/dist/esm/metric/{SmithWaterman.js → SmithWaterman.mjs} +4 -4
- package/dist/esm/metric/SmithWaterman.mjs.map +1 -0
- package/dist/esm/metric/{qGram.js → qGram.mjs} +4 -4
- package/dist/esm/metric/qGram.mjs.map +1 -0
- package/dist/esm/phonetic/Caverphone.mjs +201 -0
- package/dist/esm/phonetic/Caverphone.mjs.map +1 -0
- package/dist/esm/phonetic/{Cologne.js → Cologne.mjs} +3 -3
- package/dist/esm/phonetic/Cologne.mjs.map +1 -0
- package/dist/esm/phonetic/{Metaphone.js → Metaphone.mjs} +3 -3
- package/dist/esm/phonetic/Metaphone.mjs.map +1 -0
- package/dist/esm/phonetic/{Phonetic.js → Phonetic.mjs} +52 -21
- package/dist/esm/phonetic/Phonetic.mjs.map +1 -0
- package/dist/esm/phonetic/{Soundex.js → Soundex.mjs} +3 -3
- package/dist/esm/phonetic/Soundex.mjs.map +1 -0
- package/dist/esm/utils/{DeepMerge.js → DeepMerge.mjs} +3 -3
- package/dist/esm/utils/DeepMerge.mjs.map +1 -0
- package/dist/esm/utils/{DiffChecker.js → DiffChecker.mjs} +11 -11
- package/dist/esm/utils/DiffChecker.mjs.map +1 -0
- package/dist/esm/utils/{Filter.js → Filter.mjs} +2 -2
- package/dist/esm/utils/Filter.mjs.map +1 -0
- package/dist/esm/utils/{HashTable.js → HashTable.mjs} +2 -2
- package/dist/esm/utils/HashTable.mjs.map +1 -0
- package/dist/esm/utils/{Normalizer.js → Normalizer.mjs} +3 -3
- package/dist/esm/utils/Normalizer.mjs.map +1 -0
- package/dist/esm/utils/{Pool.js → Pool.mjs} +2 -2
- package/dist/esm/utils/Pool.mjs.map +1 -0
- package/dist/esm/utils/{Profiler.js → Profiler.mjs} +4 -4
- package/dist/esm/utils/Profiler.mjs.map +1 -0
- package/dist/esm/utils/{Registry.js → Registry.mjs} +8 -8
- package/dist/esm/utils/Registry.mjs.map +1 -0
- package/dist/esm/utils/{TextAnalyzer.js → TextAnalyzer.mjs} +3 -3
- package/dist/esm/utils/TextAnalyzer.mjs.map +1 -0
- package/dist/types/index.d.ts +2 -2
- package/dist/types/phonetic/Caverphone.d.ts +55 -0
- package/dist/types/phonetic/Phonetic.d.ts +14 -2
- package/dist/types/phonetic/index.d.ts +1 -0
- package/dist/types/utils/Types.d.ts +11 -0
- package/package.json +14 -12
- package/dist/esm/CmpStr.js.map +0 -1
- package/dist/esm/CmpStrAsync.js.map +0 -1
- package/dist/esm/index.js +0 -7
- package/dist/esm/index.js.map +0 -1
- package/dist/esm/metric/Cosine.js.map +0 -1
- package/dist/esm/metric/DamerauLevenshtein.js.map +0 -1
- package/dist/esm/metric/DiceSorensen.js.map +0 -1
- package/dist/esm/metric/Hamming.js.map +0 -1
- package/dist/esm/metric/Jaccard.js.map +0 -1
- package/dist/esm/metric/JaroWinkler.js.map +0 -1
- package/dist/esm/metric/LCS.js.map +0 -1
- package/dist/esm/metric/Levenshtein.js.map +0 -1
- package/dist/esm/metric/Metric.js.map +0 -1
- package/dist/esm/metric/NeedlemanWunsch.js.map +0 -1
- package/dist/esm/metric/SmithWaterman.js.map +0 -1
- package/dist/esm/metric/qGram.js.map +0 -1
- package/dist/esm/phonetic/Cologne.js.map +0 -1
- package/dist/esm/phonetic/Metaphone.js.map +0 -1
- package/dist/esm/phonetic/Phonetic.js.map +0 -1
- package/dist/esm/phonetic/Soundex.js.map +0 -1
- package/dist/esm/utils/DeepMerge.js.map +0 -1
- package/dist/esm/utils/DiffChecker.js.map +0 -1
- package/dist/esm/utils/Filter.js.map +0 -1
- package/dist/esm/utils/HashTable.js.map +0 -1
- package/dist/esm/utils/Normalizer.js.map +0 -1
- package/dist/esm/utils/Pool.js.map +0 -1
- package/dist/esm/utils/Profiler.js.map +0 -1
- package/dist/esm/utils/Registry.js.map +0 -1
- package/dist/esm/utils/TextAnalyzer.js.map +0 -1
package/README.md
CHANGED
|
@@ -68,8 +68,14 @@ console.log( result );
|
|
|
68
68
|
// [ 'Meyer', 'Meier' ]
|
|
69
69
|
```
|
|
70
70
|
|
|
71
|
+
_Try with [OneCompiler](https://onecompiler.com/nodejs/43qr6trny)._
|
|
72
|
+
|
|
73
|
+
## CLI Tool
|
|
74
|
+
|
|
75
|
+
Try out or use CmpStr on the terminal. Install the **[cmpstr-cli](https://npmjs.com/package/cmpstr-cli)** package and use many features of CmpStr directly on the console via the cmpstr command. Many options and parameters also make the command suitable for scripts and automatic processing.
|
|
76
|
+
|
|
71
77
|
## Documentation
|
|
72
78
|
|
|
73
79
|
The full documentation, API reference and advanced usage examples are available in the [GitHub Wiki](https://github.com/komed3/cmpstr/wiki).
|
|
74
80
|
|
|
75
|
-
**LICENSE MIT © 2023-2025 PAUL KÖHLER (KOMED3)**
|
|
81
|
+
**LICENSE MIT © 2023-2025 PAUL KÖHLER (KOMED3)**
|
package/dist/CmpStr.esm.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* CmpStr v3.0.
|
|
2
|
+
* CmpStr v3.0.2 build-522ae69-250720
|
|
3
3
|
* This is a lightweight, fast and well performing library for calculating string similarity.
|
|
4
4
|
* (c) 2023-2025 Paul Köhler @komed3 / MIT License
|
|
5
5
|
* Visit https://github.com/komed3/cmpstr and https://npmjs.org/package/cmpstr
|
|
@@ -61,7 +61,7 @@ function set(t, path, value) {
|
|
|
61
61
|
const [k, ...r] = parse(path);
|
|
62
62
|
// Throw an error if the key is not a valid identifier
|
|
63
63
|
if (t !== undefined && (typeof t !== 'object' || t === null))
|
|
64
|
-
throw Error(`
|
|
64
|
+
throw Error(`Cannot set property <${k}> of <${JSON.stringify(t)}>`);
|
|
65
65
|
// Assign the value to the specified key in the object
|
|
66
66
|
return Object.assign(t ?? (typeof k === 'number' ? [] : Object.create(null)), {
|
|
67
67
|
[k]: set(t?.[k], r.join('.'), value)
|
|
@@ -1680,7 +1680,7 @@ const factory = Object.create(null);
|
|
|
1680
1680
|
function Registry(reg, ctor) {
|
|
1681
1681
|
// Throws an error if the registry already exists
|
|
1682
1682
|
if (reg in registry || reg in factory)
|
|
1683
|
-
throw new Error(`
|
|
1683
|
+
throw new Error(`Registry <${reg}> already exists / overwriting is forbidden`);
|
|
1684
1684
|
// Create a registry object to hold class constructors
|
|
1685
1685
|
const classes = Object.create(null);
|
|
1686
1686
|
const service = {
|
|
@@ -1695,9 +1695,9 @@ function Registry(reg, ctor) {
|
|
|
1695
1695
|
*/
|
|
1696
1696
|
add(name, cls, update = false) {
|
|
1697
1697
|
if (!(cls.prototype instanceof ctor))
|
|
1698
|
-
throw new TypeError(`
|
|
1698
|
+
throw new TypeError(`Class must extend <${reg}>`);
|
|
1699
1699
|
if (!update && name in classes)
|
|
1700
|
-
throw new Error(`
|
|
1700
|
+
throw new Error(`Entry <${name}> already exists / use <update=true> to overwrite`);
|
|
1701
1701
|
classes[name] = cls;
|
|
1702
1702
|
},
|
|
1703
1703
|
/**
|
|
@@ -1728,7 +1728,7 @@ function Registry(reg, ctor) {
|
|
|
1728
1728
|
*/
|
|
1729
1729
|
get(name) {
|
|
1730
1730
|
if (!(name in classes))
|
|
1731
|
-
throw new Error(`
|
|
1731
|
+
throw new Error(`Class <${name}> not registered for <${reg}>`);
|
|
1732
1732
|
return classes[name];
|
|
1733
1733
|
}
|
|
1734
1734
|
};
|
|
@@ -1749,7 +1749,7 @@ function Registry(reg, ctor) {
|
|
|
1749
1749
|
*/
|
|
1750
1750
|
function resolveCls(reg, cls) {
|
|
1751
1751
|
if (!(reg in registry))
|
|
1752
|
-
throw new ReferenceError(`
|
|
1752
|
+
throw new ReferenceError(`Registry <${reg}> does not exist`);
|
|
1753
1753
|
return (typeof cls === 'string' ? registry[reg]?.get(cls) : cls);
|
|
1754
1754
|
}
|
|
1755
1755
|
/**
|
|
@@ -1767,7 +1767,7 @@ function createFromRegistry(reg, cls, ...args) {
|
|
|
1767
1767
|
return new cls(...args);
|
|
1768
1768
|
}
|
|
1769
1769
|
catch (err) {
|
|
1770
|
-
throw new Error(`
|
|
1770
|
+
throw new Error(`Cannot instantiate class <${cls}>`);
|
|
1771
1771
|
}
|
|
1772
1772
|
}
|
|
1773
1773
|
|
|
@@ -1902,7 +1902,7 @@ class Metric {
|
|
|
1902
1902
|
* @throws {Error} - If not overridden in a subclass
|
|
1903
1903
|
*/
|
|
1904
1904
|
compute(a, b, m, n, maxLen) {
|
|
1905
|
-
throw new Error(`
|
|
1905
|
+
throw new Error(`Method compute() must be overridden in a subclass`);
|
|
1906
1906
|
}
|
|
1907
1907
|
/**
|
|
1908
1908
|
* Run the metric computation for single inputs (two strings).
|
|
@@ -2059,7 +2059,7 @@ class Metric {
|
|
|
2059
2059
|
*/
|
|
2060
2060
|
isPairwise(safe = false) {
|
|
2061
2061
|
return this.isBatch() && this.a.length === this.b.length ? true : !safe && (() => {
|
|
2062
|
-
throw new Error(`
|
|
2062
|
+
throw new Error(`Mode <pairwise> requires arrays of equal length`);
|
|
2063
2063
|
})();
|
|
2064
2064
|
}
|
|
2065
2065
|
/**
|
|
@@ -2120,7 +2120,7 @@ class Metric {
|
|
|
2120
2120
|
this.runPairwise();
|
|
2121
2121
|
break;
|
|
2122
2122
|
// Unsupported mode
|
|
2123
|
-
default: throw new Error(`
|
|
2123
|
+
default: throw new Error(`Unsupported mode <${mode}>`);
|
|
2124
2124
|
}
|
|
2125
2125
|
}
|
|
2126
2126
|
/**
|
|
@@ -2155,7 +2155,7 @@ class Metric {
|
|
|
2155
2155
|
await this.runPairwiseAsync();
|
|
2156
2156
|
break;
|
|
2157
2157
|
// Unsupported mode
|
|
2158
|
-
default: throw new Error(`
|
|
2158
|
+
default: throw new Error(`Unsupported async mode <${mode}>`);
|
|
2159
2159
|
}
|
|
2160
2160
|
}
|
|
2161
2161
|
/**
|
|
@@ -2698,7 +2698,7 @@ class HammingDistance extends Metric {
|
|
|
2698
2698
|
}
|
|
2699
2699
|
// Standard: Error for unequal length
|
|
2700
2700
|
else
|
|
2701
|
-
throw new Error(`
|
|
2701
|
+
throw new Error(`Strings must be of equal length for Hamming Distance, a=${m} and b=${n} given, ` +
|
|
2702
2702
|
`use option.pad for automatic adjustment`);
|
|
2703
2703
|
}
|
|
2704
2704
|
// Calculate the Hamming distance
|
|
@@ -3347,8 +3347,8 @@ MetricRegistry.add('smithWaterman', SmithWatermanDistance);
|
|
|
3347
3347
|
* pose a risk of infringing upon existing trademarks due to their pronunciation.
|
|
3348
3348
|
*
|
|
3349
3349
|
* This module provides an abstract class for generating phonetic indices based
|
|
3350
|
-
* on mappings and rules. It allows for the implementation of various
|
|
3351
|
-
* algorithms by extending the abstract class.
|
|
3350
|
+
* on mappings, patterns and rules. It allows for the implementation of various
|
|
3351
|
+
* phonetic algorithms by extending the abstract class.
|
|
3352
3352
|
*
|
|
3353
3353
|
* @module Phonetic
|
|
3354
3354
|
* @author Paul Köhler (komed3)
|
|
@@ -3388,22 +3388,55 @@ class Phonetic {
|
|
|
3388
3388
|
* Constructor for the Phonetic class.
|
|
3389
3389
|
*
|
|
3390
3390
|
* Initializes the phonetic algorithm with the specified options and mapping.
|
|
3391
|
+
* Options hierarchy: User input > mapping options > default
|
|
3391
3392
|
*
|
|
3392
3393
|
* @param {string} algo - The name of the algorithm (e.g. 'soundex')
|
|
3393
3394
|
* @param {PhoneticOptions} [opt] - Options for the phonetic algorithm
|
|
3394
3395
|
* @throws {Error} - If the requested mapping is not declared
|
|
3395
3396
|
*/
|
|
3396
3397
|
constructor(algo, opt = {}) {
|
|
3397
|
-
//
|
|
3398
|
-
|
|
3399
|
-
//
|
|
3400
|
-
const
|
|
3398
|
+
// Get the phonetic default options
|
|
3399
|
+
const defaults = this.constructor.default ?? {};
|
|
3400
|
+
// Determine phonetic map ID from options or use defaults
|
|
3401
|
+
const mapId = opt.map ?? defaults.map;
|
|
3402
|
+
// If no algorithm is specified, throw an error
|
|
3403
|
+
if (!mapId)
|
|
3404
|
+
throw new Error(`No mapping specified for phonetic algorithm`);
|
|
3405
|
+
// Get the mapping based on the determined map ID
|
|
3406
|
+
const map = PhoneticMappingRegistry.get(algo, mapId);
|
|
3401
3407
|
// If the mapping is not defined, throw an error
|
|
3402
3408
|
if (map === undefined)
|
|
3403
|
-
throw new Error(`
|
|
3409
|
+
throw new Error(`Requested mapping <${mapId}> is not declared`);
|
|
3410
|
+
// Set the options by merging the default options with the provided ones
|
|
3411
|
+
this.options = merge(merge(defaults, map.options ?? {}), opt);
|
|
3412
|
+
// Set the algorithm name and mapping
|
|
3404
3413
|
this.algo = algo;
|
|
3405
3414
|
this.map = map;
|
|
3406
3415
|
}
|
|
3416
|
+
/**
|
|
3417
|
+
* Applies patterns to a word based on the phonetic map.
|
|
3418
|
+
*
|
|
3419
|
+
* This method processes the word by applying all defined patterns in the
|
|
3420
|
+
* phonetic map. It replaces occurrences of specified patterns with their
|
|
3421
|
+
* corresponding replacements.
|
|
3422
|
+
*
|
|
3423
|
+
* @param {string} word - The input word to be processed
|
|
3424
|
+
* @returns {string} - The modified word after applying all patterns
|
|
3425
|
+
*/
|
|
3426
|
+
applyPattern(word) {
|
|
3427
|
+
const { patterns = [] } = this.map;
|
|
3428
|
+
// If no patterns are provided, return the input
|
|
3429
|
+
if (!patterns || !patterns.length)
|
|
3430
|
+
return word;
|
|
3431
|
+
// Iterate over the patterns and replace all matches
|
|
3432
|
+
for (const { pattern, replace, all = false } of patterns) {
|
|
3433
|
+
// Search for the pattern in the word and replace it
|
|
3434
|
+
// Use replaceAll if 'all' is true, otherwise use replace
|
|
3435
|
+
word = word[all ? 'replaceAll' : 'replace'](pattern, replace);
|
|
3436
|
+
}
|
|
3437
|
+
// Return the modified word after applying all patterns
|
|
3438
|
+
return word;
|
|
3439
|
+
}
|
|
3407
3440
|
/**
|
|
3408
3441
|
* Applies phonetic rules to a character in a word context.
|
|
3409
3442
|
*
|
|
@@ -3481,6 +3514,9 @@ class Phonetic {
|
|
|
3481
3514
|
*/
|
|
3482
3515
|
encode(word) {
|
|
3483
3516
|
const { map = {}, ignore = [] } = this.map;
|
|
3517
|
+
// Apply patterns to the word before processing
|
|
3518
|
+
// This allows for pre-processing of the word based on defined patterns
|
|
3519
|
+
word = this.applyPattern(word);
|
|
3484
3520
|
// Get the characters of the word and its length
|
|
3485
3521
|
const chars = this.word2Chars(word);
|
|
3486
3522
|
const charLen = chars.length;
|
|
@@ -3517,11 +3553,11 @@ class Phonetic {
|
|
|
3517
3553
|
* @returns {string|undefined} - The phonetic code or undefined if no code applies
|
|
3518
3554
|
*/
|
|
3519
3555
|
mapChar(char, i, chars, charLen, lastCode, map) {
|
|
3520
|
-
const { dedupe = true } = this.options;
|
|
3556
|
+
const { dedupe = true, fallback = undefined } = this.options;
|
|
3521
3557
|
// Apply phonetic rules to the character
|
|
3522
3558
|
// If no rules apply, use the mapping
|
|
3523
|
-
// If the character is not in the mapping, return
|
|
3524
|
-
const c = this.applyRules(char, i, chars, charLen) ?? map[char] ??
|
|
3559
|
+
// If the character is not in the mapping, return the fallback
|
|
3560
|
+
const c = this.applyRules(char, i, chars, charLen) ?? map[char] ?? fallback;
|
|
3525
3561
|
// De-duplicate the code if necessary
|
|
3526
3562
|
return dedupe && c === lastCode ? undefined : c;
|
|
3527
3563
|
}
|
|
@@ -3674,7 +3710,7 @@ const PhoneticMappingRegistry = (() => {
|
|
|
3674
3710
|
add(algo, id, map, update = false) {
|
|
3675
3711
|
const mappings = maps(algo);
|
|
3676
3712
|
if (!update && id in mappings)
|
|
3677
|
-
throw new Error(`
|
|
3713
|
+
throw new Error(`Entry <${id}> already exists / use <update=true> to overwrite`);
|
|
3678
3714
|
mappings[id] = map;
|
|
3679
3715
|
},
|
|
3680
3716
|
/**
|
|
@@ -3710,6 +3746,188 @@ const PhoneticMappingRegistry = (() => {
|
|
|
3710
3746
|
};
|
|
3711
3747
|
})();
|
|
3712
3748
|
|
|
3749
|
+
/**
|
|
3750
|
+
* Caverphone Phonetic Algorithm
|
|
3751
|
+
* src/phonetic/Caverphone.ts
|
|
3752
|
+
*
|
|
3753
|
+
* @see https://en.wikipedia.org/wiki/Caverphone
|
|
3754
|
+
*
|
|
3755
|
+
* This module implements the Caverphone phonetic algorithm, which is designed
|
|
3756
|
+
* to encode words into a phonetic representation. The Caverphone algorithm is
|
|
3757
|
+
* used primarily in New Zealand and was developed to assist in the indexing of
|
|
3758
|
+
* names in genealogical databases.
|
|
3759
|
+
*
|
|
3760
|
+
* It converts words into a standardized phonetic code, allowing for variations
|
|
3761
|
+
* in spelling and pronunciation to be matched.
|
|
3762
|
+
*
|
|
3763
|
+
* @module Phonetic/Caverphone
|
|
3764
|
+
* @author Paul Köhler (komed3)
|
|
3765
|
+
* @license MIT
|
|
3766
|
+
*/
|
|
3767
|
+
/**
|
|
3768
|
+
* Caverphone class extends the Phonetic class to implement the Caverphone phonetic algorithm.
|
|
3769
|
+
*/
|
|
3770
|
+
class Caverphone extends Phonetic {
|
|
3771
|
+
// Default options for the Caverphone phonetic algorithm
|
|
3772
|
+
static default = {
|
|
3773
|
+
map: 'en2', delimiter: ' ', length: -1, pad: '', dedupe: false
|
|
3774
|
+
};
|
|
3775
|
+
/**
|
|
3776
|
+
* Constructor for the Caverphone class.
|
|
3777
|
+
*
|
|
3778
|
+
* Initializes the Caverphone phonetic algorithm with the mapping and options.
|
|
3779
|
+
*
|
|
3780
|
+
* @param {PhoneticOptions} [opt] - Options for the Caverphone phonetic algorithm
|
|
3781
|
+
*/
|
|
3782
|
+
constructor(opt = {}) { super('caverphone', opt); }
|
|
3783
|
+
/**
|
|
3784
|
+
* Generates the Caverphone code for a given word.
|
|
3785
|
+
*
|
|
3786
|
+
* @param {string} word - The input word to be converted into a Caverphone code
|
|
3787
|
+
* @returns {string} - The generated Caverphone code
|
|
3788
|
+
*/
|
|
3789
|
+
encode(word) {
|
|
3790
|
+
// Remove anything not A-Z and convert to lowercase
|
|
3791
|
+
word = word.replace(/[^A-Z]/gi, '').toLowerCase();
|
|
3792
|
+
// Use the base implementation for rule/mapping application
|
|
3793
|
+
return super.encode(word);
|
|
3794
|
+
}
|
|
3795
|
+
/**
|
|
3796
|
+
* Overrides the mapChar method to skip character mapping.
|
|
3797
|
+
*
|
|
3798
|
+
* @param {string} char - The character to be mapped
|
|
3799
|
+
* @returns {string} - The mapped character
|
|
3800
|
+
*/
|
|
3801
|
+
mapChar(char) { return char; }
|
|
3802
|
+
/**
|
|
3803
|
+
* Adjusts the phonetic code to uppercase.
|
|
3804
|
+
*
|
|
3805
|
+
* @param {string} code - The phonetic code to adjust
|
|
3806
|
+
* @returns {string} - The adjusted phonetic code
|
|
3807
|
+
*/
|
|
3808
|
+
adjustCode(code) { return code.toUpperCase(); }
|
|
3809
|
+
}
|
|
3810
|
+
// Register the Caverphone algorithm in the phonetic registry
|
|
3811
|
+
PhoneticRegistry.add('caverphone', Caverphone);
|
|
3812
|
+
// Register the Caverphone 1.0 phonetic mapping for English
|
|
3813
|
+
PhoneticMappingRegistry.add('caverphone', 'en1', {
|
|
3814
|
+
options: { length: 6, pad: '1' },
|
|
3815
|
+
map: {},
|
|
3816
|
+
patterns: [
|
|
3817
|
+
// Special word-initial replacements
|
|
3818
|
+
{ pattern: /^(c|r|t|en)ough/, replace: '$1ou2f' },
|
|
3819
|
+
{ pattern: /^gn/, replace: '2n' },
|
|
3820
|
+
// Special word-final replacement
|
|
3821
|
+
{ pattern: /mb$/, replace: 'm2' },
|
|
3822
|
+
// Character group replacements
|
|
3823
|
+
{ pattern: /cq/g, replace: '2q' },
|
|
3824
|
+
{ pattern: /c(e|i|y)/g, replace: 's$1' },
|
|
3825
|
+
{ pattern: /tch/g, replace: '2ch' },
|
|
3826
|
+
{ pattern: /[cqx]/g, replace: 'k' },
|
|
3827
|
+
{ pattern: /v/g, replace: 'f' },
|
|
3828
|
+
{ pattern: /dg/g, replace: '2g' },
|
|
3829
|
+
{ pattern: /ti(a|o)/g, replace: 'si$1' },
|
|
3830
|
+
{ pattern: /d/g, replace: 't' },
|
|
3831
|
+
{ pattern: /ph/g, replace: 'fh' },
|
|
3832
|
+
{ pattern: /b/g, replace: 'p' },
|
|
3833
|
+
{ pattern: /sh/g, replace: 's2' },
|
|
3834
|
+
{ pattern: /z/g, replace: 's' },
|
|
3835
|
+
// Vowel handling
|
|
3836
|
+
{ pattern: /^[aeiou]/, replace: 'A' },
|
|
3837
|
+
{ pattern: /[aeiou]/g, replace: '3' },
|
|
3838
|
+
// Special gh handling
|
|
3839
|
+
{ pattern: /3gh3/g, replace: '3kh3' },
|
|
3840
|
+
{ pattern: /gh/g, replace: '22' },
|
|
3841
|
+
// Single character replacements
|
|
3842
|
+
{ pattern: /g/g, replace: 'k' },
|
|
3843
|
+
// Collapse repeated consonants
|
|
3844
|
+
{ pattern: /s+/g, replace: 'S' },
|
|
3845
|
+
{ pattern: /t+/g, replace: 'T' },
|
|
3846
|
+
{ pattern: /p+/g, replace: 'P' },
|
|
3847
|
+
{ pattern: /k+/g, replace: 'K' },
|
|
3848
|
+
{ pattern: /f+/g, replace: 'F' },
|
|
3849
|
+
{ pattern: /m+/g, replace: 'M' },
|
|
3850
|
+
{ pattern: /n+/g, replace: 'N' },
|
|
3851
|
+
// Y and other single-letter handling
|
|
3852
|
+
{ pattern: /j/g, replace: 'y' },
|
|
3853
|
+
// L/R/W/Y3 handling
|
|
3854
|
+
{ pattern: /l3/g, replace: 'L3' },
|
|
3855
|
+
{ pattern: /r3/g, replace: 'R3' },
|
|
3856
|
+
{ pattern: /w3/g, replace: 'W3' },
|
|
3857
|
+
{ pattern: /y3/g, replace: 'Y3' },
|
|
3858
|
+
// L/R/W followed by y
|
|
3859
|
+
{ pattern: /ly/g, replace: 'Ly' },
|
|
3860
|
+
{ pattern: /ry/g, replace: 'Ry' },
|
|
3861
|
+
{ pattern: /wy/g, replace: 'Wy' },
|
|
3862
|
+
// WH handling
|
|
3863
|
+
{ pattern: /wh3/g, replace: 'Wh3' },
|
|
3864
|
+
{ pattern: /why/g, replace: 'Why' },
|
|
3865
|
+
// H at start
|
|
3866
|
+
{ pattern: /^h/, replace: 'A' },
|
|
3867
|
+
// Remove certain letters
|
|
3868
|
+
{ pattern: /[hlrwy23]/g, replace: '' }
|
|
3869
|
+
]
|
|
3870
|
+
});
|
|
3871
|
+
// Register the Caverphone 2.0 phonetic mapping for English
|
|
3872
|
+
PhoneticMappingRegistry.add('caverphone', 'en2', {
|
|
3873
|
+
options: { length: 10, pad: '1' },
|
|
3874
|
+
map: {},
|
|
3875
|
+
patterns: [
|
|
3876
|
+
// Remove trailing 'e'
|
|
3877
|
+
{ pattern: /e$/, replace: '' },
|
|
3878
|
+
// Special word-initial replacements
|
|
3879
|
+
{ pattern: /^(c|r|t|en|tr)ough/, replace: '$1ou2f' },
|
|
3880
|
+
{ pattern: /^gn/, replace: '2n' },
|
|
3881
|
+
// Special word-final replacement
|
|
3882
|
+
{ pattern: /mb$/, replace: 'm2' },
|
|
3883
|
+
// Character group replacements
|
|
3884
|
+
{ pattern: /cq/g, replace: '2q' },
|
|
3885
|
+
{ pattern: /c(e|i|y)/g, replace: 's$1' },
|
|
3886
|
+
{ pattern: /tch/g, replace: '2ch' },
|
|
3887
|
+
{ pattern: /[cqx]/g, replace: 'k' },
|
|
3888
|
+
{ pattern: /v/g, replace: 'f' },
|
|
3889
|
+
{ pattern: /dg/g, replace: '2g' },
|
|
3890
|
+
{ pattern: /ti(a|o)/g, replace: 'si$1' },
|
|
3891
|
+
{ pattern: /d/g, replace: 't' },
|
|
3892
|
+
{ pattern: /ph/g, replace: 'fh' },
|
|
3893
|
+
{ pattern: /b/g, replace: 'p' },
|
|
3894
|
+
{ pattern: /sh/g, replace: 's2' },
|
|
3895
|
+
{ pattern: /z/g, replace: 's' },
|
|
3896
|
+
// Vowel handling
|
|
3897
|
+
{ pattern: /^[aeiou]/, replace: 'A' },
|
|
3898
|
+
{ pattern: /[aeiou]/g, replace: '3' },
|
|
3899
|
+
// Y handling
|
|
3900
|
+
{ pattern: /j/g, replace: 'y' },
|
|
3901
|
+
{ pattern: /^y3/, replace: 'Y3' },
|
|
3902
|
+
{ pattern: /^y/, replace: 'A' },
|
|
3903
|
+
{ pattern: /y/g, replace: '3' },
|
|
3904
|
+
// Special gh handling
|
|
3905
|
+
{ pattern: /3gh3/g, replace: '3kh3' },
|
|
3906
|
+
{ pattern: /gh/g, replace: '22' },
|
|
3907
|
+
// Single character replacements
|
|
3908
|
+
{ pattern: /g/g, replace: 'k' },
|
|
3909
|
+
// Collapse repeated consonants
|
|
3910
|
+
{ pattern: /s+/g, replace: 'S' },
|
|
3911
|
+
{ pattern: /t+/g, replace: 'T' },
|
|
3912
|
+
{ pattern: /p+/g, replace: 'P' },
|
|
3913
|
+
{ pattern: /k+/g, replace: 'K' },
|
|
3914
|
+
{ pattern: /f+/g, replace: 'F' },
|
|
3915
|
+
{ pattern: /m+/g, replace: 'M' },
|
|
3916
|
+
{ pattern: /n+/g, replace: 'N' },
|
|
3917
|
+
// L/R/W3 handling
|
|
3918
|
+
{ pattern: /l3/g, replace: 'L3' },
|
|
3919
|
+
{ pattern: /r3/g, replace: 'R3' },
|
|
3920
|
+
{ pattern: /w3/g, replace: 'W3' },
|
|
3921
|
+
{ pattern: /wh3/g, replace: 'Wh3' },
|
|
3922
|
+
{ pattern: /[lrw]$/, replace: '3' },
|
|
3923
|
+
// // H at start and final 3 handling
|
|
3924
|
+
{ pattern: /^h/, replace: 'A' },
|
|
3925
|
+
{ pattern: /3$/, replace: 'A' },
|
|
3926
|
+
// Remove certain letters
|
|
3927
|
+
{ pattern: /[hlrw23]/g, replace: '' }
|
|
3928
|
+
]
|
|
3929
|
+
});
|
|
3930
|
+
|
|
3713
3931
|
/**
|
|
3714
3932
|
* Cologne Phonetic Algorithm
|
|
3715
3933
|
* src/phonetic/Cologne.ts
|