cmpstr 3.0.0 → 3.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -4
- package/dist/CmpStr.esm.js +242 -24
- package/dist/CmpStr.esm.js.map +1 -1
- package/dist/CmpStr.esm.min.js +2 -2
- package/dist/CmpStr.esm.min.js.map +1 -1
- package/dist/CmpStr.umd.js +242 -24
- package/dist/CmpStr.umd.js.map +1 -1
- package/dist/CmpStr.umd.min.js +2 -2
- package/dist/CmpStr.umd.min.js.map +1 -1
- package/dist/cjs/{CmpStr.js → CmpStr.cjs} +26 -25
- package/dist/cjs/CmpStr.cjs.map +1 -0
- package/dist/cjs/{CmpStrAsync.js → CmpStrAsync.cjs} +6 -6
- package/dist/cjs/CmpStrAsync.cjs.map +1 -0
- package/dist/cjs/index.cjs +15 -0
- package/dist/cjs/index.cjs.map +1 -0
- package/dist/cjs/metric/{Cosine.js → Cosine.cjs} +4 -4
- package/dist/cjs/metric/Cosine.cjs.map +1 -0
- package/dist/cjs/metric/{DamerauLevenshtein.js → DamerauLevenshtein.cjs} +4 -4
- package/dist/cjs/metric/DamerauLevenshtein.cjs.map +1 -0
- package/dist/cjs/metric/{DiceSorensen.js → DiceSorensen.cjs} +4 -4
- package/dist/cjs/metric/DiceSorensen.cjs.map +1 -0
- package/dist/cjs/metric/{Hamming.js → Hamming.cjs} +4 -4
- package/dist/cjs/metric/Hamming.cjs.map +1 -0
- package/dist/cjs/metric/{Jaccard.js → Jaccard.cjs} +4 -4
- package/dist/cjs/metric/Jaccard.cjs.map +1 -0
- package/dist/cjs/metric/{JaroWinkler.js → JaroWinkler.cjs} +4 -4
- package/dist/cjs/metric/JaroWinkler.cjs.map +1 -0
- package/dist/cjs/metric/{LCS.js → LCS.cjs} +4 -4
- package/dist/cjs/metric/LCS.cjs.map +1 -0
- package/dist/cjs/metric/{Levenshtein.js → Levenshtein.cjs} +4 -4
- package/dist/cjs/metric/Levenshtein.cjs.map +1 -0
- package/dist/cjs/metric/{Metric.js → Metric.cjs} +9 -9
- package/dist/cjs/metric/Metric.cjs.map +1 -0
- package/dist/cjs/metric/{NeedlemanWunsch.js → NeedlemanWunsch.cjs} +4 -4
- package/dist/cjs/metric/NeedlemanWunsch.cjs.map +1 -0
- package/dist/cjs/metric/{SmithWaterman.js → SmithWaterman.cjs} +4 -4
- package/dist/cjs/metric/SmithWaterman.cjs.map +1 -0
- package/dist/cjs/metric/{qGram.js → qGram.cjs} +4 -4
- package/dist/cjs/metric/qGram.cjs.map +1 -0
- package/dist/cjs/phonetic/Caverphone.cjs +199 -0
- package/dist/cjs/phonetic/Caverphone.cjs.map +1 -0
- package/dist/cjs/phonetic/{Cologne.js → Cologne.cjs} +3 -3
- package/dist/cjs/phonetic/Cologne.cjs.map +1 -0
- package/dist/cjs/phonetic/{Metaphone.js → Metaphone.cjs} +3 -3
- package/dist/cjs/phonetic/Metaphone.cjs.map +1 -0
- package/dist/cjs/phonetic/{Phonetic.js → Phonetic.cjs} +55 -21
- package/dist/cjs/phonetic/Phonetic.cjs.map +1 -0
- package/dist/cjs/phonetic/{Soundex.js → Soundex.cjs} +3 -3
- package/dist/cjs/phonetic/Soundex.cjs.map +1 -0
- package/dist/cjs/utils/{DeepMerge.js → DeepMerge.cjs} +3 -3
- package/dist/cjs/utils/DeepMerge.cjs.map +1 -0
- package/dist/cjs/utils/{DiffChecker.js → DiffChecker.cjs} +11 -11
- package/dist/cjs/utils/DiffChecker.cjs.map +1 -0
- package/dist/cjs/utils/{Filter.js → Filter.cjs} +2 -2
- package/dist/cjs/utils/Filter.cjs.map +1 -0
- package/dist/cjs/utils/{HashTable.js → HashTable.cjs} +2 -2
- package/dist/cjs/utils/HashTable.cjs.map +1 -0
- package/dist/cjs/utils/{Normalizer.js → Normalizer.cjs} +3 -3
- package/dist/cjs/utils/Normalizer.cjs.map +1 -0
- package/dist/cjs/utils/{Pool.js → Pool.cjs} +2 -2
- package/dist/cjs/utils/Pool.cjs.map +1 -0
- package/dist/cjs/utils/{Profiler.js → Profiler.cjs} +4 -4
- package/dist/cjs/utils/Profiler.cjs.map +1 -0
- package/dist/cjs/utils/{Registry.js → Registry.cjs} +8 -8
- package/dist/cjs/utils/Registry.cjs.map +1 -0
- package/dist/cjs/utils/{TextAnalyzer.js → TextAnalyzer.cjs} +3 -3
- package/dist/cjs/utils/TextAnalyzer.cjs.map +1 -0
- package/dist/esm/{CmpStr.js → CmpStr.mjs} +26 -25
- package/dist/esm/CmpStr.mjs.map +1 -0
- package/dist/esm/{CmpStrAsync.js → CmpStrAsync.mjs} +6 -6
- package/dist/esm/CmpStrAsync.mjs.map +1 -0
- package/dist/esm/index.mjs +7 -0
- package/dist/esm/index.mjs.map +1 -0
- package/dist/esm/metric/{Cosine.js → Cosine.mjs} +4 -4
- package/dist/esm/metric/Cosine.mjs.map +1 -0
- package/dist/esm/metric/{DamerauLevenshtein.js → DamerauLevenshtein.mjs} +4 -4
- package/dist/esm/metric/DamerauLevenshtein.mjs.map +1 -0
- package/dist/esm/metric/{DiceSorensen.js → DiceSorensen.mjs} +4 -4
- package/dist/esm/metric/DiceSorensen.mjs.map +1 -0
- package/dist/esm/metric/{Hamming.js → Hamming.mjs} +4 -4
- package/dist/esm/metric/Hamming.mjs.map +1 -0
- package/dist/esm/metric/{Jaccard.js → Jaccard.mjs} +4 -4
- package/dist/esm/metric/Jaccard.mjs.map +1 -0
- package/dist/esm/metric/{JaroWinkler.js → JaroWinkler.mjs} +4 -4
- package/dist/esm/metric/JaroWinkler.mjs.map +1 -0
- package/dist/esm/metric/{LCS.js → LCS.mjs} +4 -4
- package/dist/esm/metric/LCS.mjs.map +1 -0
- package/dist/esm/metric/{Levenshtein.js → Levenshtein.mjs} +4 -4
- package/dist/esm/metric/Levenshtein.mjs.map +1 -0
- package/dist/esm/metric/{Metric.js → Metric.mjs} +9 -9
- package/dist/esm/metric/Metric.mjs.map +1 -0
- package/dist/esm/metric/{NeedlemanWunsch.js → NeedlemanWunsch.mjs} +4 -4
- package/dist/esm/metric/NeedlemanWunsch.mjs.map +1 -0
- package/dist/esm/metric/{SmithWaterman.js → SmithWaterman.mjs} +4 -4
- package/dist/esm/metric/SmithWaterman.mjs.map +1 -0
- package/dist/esm/metric/{qGram.js → qGram.mjs} +4 -4
- package/dist/esm/metric/qGram.mjs.map +1 -0
- package/dist/esm/phonetic/Caverphone.mjs +201 -0
- package/dist/esm/phonetic/Caverphone.mjs.map +1 -0
- package/dist/esm/phonetic/{Cologne.js → Cologne.mjs} +3 -3
- package/dist/esm/phonetic/Cologne.mjs.map +1 -0
- package/dist/esm/phonetic/{Metaphone.js → Metaphone.mjs} +3 -3
- package/dist/esm/phonetic/Metaphone.mjs.map +1 -0
- package/dist/esm/phonetic/{Phonetic.js → Phonetic.mjs} +52 -21
- package/dist/esm/phonetic/Phonetic.mjs.map +1 -0
- package/dist/esm/phonetic/{Soundex.js → Soundex.mjs} +3 -3
- package/dist/esm/phonetic/Soundex.mjs.map +1 -0
- package/dist/esm/utils/{DeepMerge.js → DeepMerge.mjs} +3 -3
- package/dist/esm/utils/DeepMerge.mjs.map +1 -0
- package/dist/esm/utils/{DiffChecker.js → DiffChecker.mjs} +11 -11
- package/dist/esm/utils/DiffChecker.mjs.map +1 -0
- package/dist/esm/utils/{Filter.js → Filter.mjs} +2 -2
- package/dist/esm/utils/Filter.mjs.map +1 -0
- package/dist/esm/utils/{HashTable.js → HashTable.mjs} +2 -2
- package/dist/esm/utils/HashTable.mjs.map +1 -0
- package/dist/esm/utils/{Normalizer.js → Normalizer.mjs} +3 -3
- package/dist/esm/utils/Normalizer.mjs.map +1 -0
- package/dist/esm/utils/{Pool.js → Pool.mjs} +2 -2
- package/dist/esm/utils/Pool.mjs.map +1 -0
- package/dist/esm/utils/{Profiler.js → Profiler.mjs} +4 -4
- package/dist/esm/utils/Profiler.mjs.map +1 -0
- package/dist/esm/utils/{Registry.js → Registry.mjs} +8 -8
- package/dist/esm/utils/Registry.mjs.map +1 -0
- package/dist/esm/utils/{TextAnalyzer.js → TextAnalyzer.mjs} +3 -3
- package/dist/esm/utils/TextAnalyzer.mjs.map +1 -0
- package/dist/types/index.d.ts +2 -2
- package/dist/types/phonetic/Caverphone.d.ts +55 -0
- package/dist/types/phonetic/Phonetic.d.ts +14 -2
- package/dist/types/phonetic/index.d.ts +1 -0
- package/dist/types/utils/Types.d.ts +11 -0
- package/package.json +21 -16
- package/dist/cjs/CmpStr.js.map +0 -1
- package/dist/cjs/CmpStrAsync.js.map +0 -1
- package/dist/cjs/index.js +0 -15
- package/dist/cjs/index.js.map +0 -1
- package/dist/cjs/metric/Cosine.js.map +0 -1
- package/dist/cjs/metric/DamerauLevenshtein.js.map +0 -1
- package/dist/cjs/metric/DiceSorensen.js.map +0 -1
- package/dist/cjs/metric/Hamming.js.map +0 -1
- package/dist/cjs/metric/Jaccard.js.map +0 -1
- package/dist/cjs/metric/JaroWinkler.js.map +0 -1
- package/dist/cjs/metric/LCS.js.map +0 -1
- package/dist/cjs/metric/Levenshtein.js.map +0 -1
- package/dist/cjs/metric/Metric.js.map +0 -1
- package/dist/cjs/metric/NeedlemanWunsch.js.map +0 -1
- package/dist/cjs/metric/SmithWaterman.js.map +0 -1
- package/dist/cjs/metric/qGram.js.map +0 -1
- package/dist/cjs/phonetic/Cologne.js.map +0 -1
- package/dist/cjs/phonetic/Metaphone.js.map +0 -1
- package/dist/cjs/phonetic/Phonetic.js.map +0 -1
- package/dist/cjs/phonetic/Soundex.js.map +0 -1
- package/dist/cjs/utils/DeepMerge.js.map +0 -1
- package/dist/cjs/utils/DiffChecker.js.map +0 -1
- package/dist/cjs/utils/Filter.js.map +0 -1
- package/dist/cjs/utils/HashTable.js.map +0 -1
- package/dist/cjs/utils/Normalizer.js.map +0 -1
- package/dist/cjs/utils/Pool.js.map +0 -1
- package/dist/cjs/utils/Profiler.js.map +0 -1
- package/dist/cjs/utils/Registry.js.map +0 -1
- package/dist/cjs/utils/TextAnalyzer.js.map +0 -1
- package/dist/esm/CmpStr.js.map +0 -1
- package/dist/esm/CmpStrAsync.js.map +0 -1
- package/dist/esm/index.js +0 -7
- package/dist/esm/index.js.map +0 -1
- package/dist/esm/metric/Cosine.js.map +0 -1
- package/dist/esm/metric/DamerauLevenshtein.js.map +0 -1
- package/dist/esm/metric/DiceSorensen.js.map +0 -1
- package/dist/esm/metric/Hamming.js.map +0 -1
- package/dist/esm/metric/Jaccard.js.map +0 -1
- package/dist/esm/metric/JaroWinkler.js.map +0 -1
- package/dist/esm/metric/LCS.js.map +0 -1
- package/dist/esm/metric/Levenshtein.js.map +0 -1
- package/dist/esm/metric/Metric.js.map +0 -1
- package/dist/esm/metric/NeedlemanWunsch.js.map +0 -1
- package/dist/esm/metric/SmithWaterman.js.map +0 -1
- package/dist/esm/metric/qGram.js.map +0 -1
- package/dist/esm/phonetic/Cologne.js.map +0 -1
- package/dist/esm/phonetic/Metaphone.js.map +0 -1
- package/dist/esm/phonetic/Phonetic.js.map +0 -1
- package/dist/esm/phonetic/Soundex.js.map +0 -1
- package/dist/esm/utils/DeepMerge.js.map +0 -1
- package/dist/esm/utils/DiffChecker.js.map +0 -1
- package/dist/esm/utils/Filter.js.map +0 -1
- package/dist/esm/utils/HashTable.js.map +0 -1
- package/dist/esm/utils/Normalizer.js.map +0 -1
- package/dist/esm/utils/Pool.js.map +0 -1
- package/dist/esm/utils/Profiler.js.map +0 -1
- package/dist/esm/utils/Registry.js.map +0 -1
- package/dist/esm/utils/TextAnalyzer.js.map +0 -1
package/dist/CmpStr.umd.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* CmpStr v3.0.
|
|
2
|
+
* CmpStr v3.0.2 build-522ae69-250720
|
|
3
3
|
* This is a lightweight, fast and well performing library for calculating string similarity.
|
|
4
4
|
* (c) 2023-2025 Paul Köhler @komed3 / MIT License
|
|
5
5
|
* Visit https://github.com/komed3/cmpstr and https://npmjs.org/package/cmpstr
|
|
@@ -67,7 +67,7 @@
|
|
|
67
67
|
const [k, ...r] = parse(path);
|
|
68
68
|
// Throw an error if the key is not a valid identifier
|
|
69
69
|
if (t !== undefined && (typeof t !== 'object' || t === null))
|
|
70
|
-
throw Error(`
|
|
70
|
+
throw Error(`Cannot set property <${k}> of <${JSON.stringify(t)}>`);
|
|
71
71
|
// Assign the value to the specified key in the object
|
|
72
72
|
return Object.assign(t ?? (typeof k === 'number' ? [] : Object.create(null)), {
|
|
73
73
|
[k]: set(t?.[k], r.join('.'), value)
|
|
@@ -1686,7 +1686,7 @@
|
|
|
1686
1686
|
function Registry(reg, ctor) {
|
|
1687
1687
|
// Throws an error if the registry already exists
|
|
1688
1688
|
if (reg in registry || reg in factory)
|
|
1689
|
-
throw new Error(`
|
|
1689
|
+
throw new Error(`Registry <${reg}> already exists / overwriting is forbidden`);
|
|
1690
1690
|
// Create a registry object to hold class constructors
|
|
1691
1691
|
const classes = Object.create(null);
|
|
1692
1692
|
const service = {
|
|
@@ -1701,9 +1701,9 @@
|
|
|
1701
1701
|
*/
|
|
1702
1702
|
add(name, cls, update = false) {
|
|
1703
1703
|
if (!(cls.prototype instanceof ctor))
|
|
1704
|
-
throw new TypeError(`
|
|
1704
|
+
throw new TypeError(`Class must extend <${reg}>`);
|
|
1705
1705
|
if (!update && name in classes)
|
|
1706
|
-
throw new Error(`
|
|
1706
|
+
throw new Error(`Entry <${name}> already exists / use <update=true> to overwrite`);
|
|
1707
1707
|
classes[name] = cls;
|
|
1708
1708
|
},
|
|
1709
1709
|
/**
|
|
@@ -1734,7 +1734,7 @@
|
|
|
1734
1734
|
*/
|
|
1735
1735
|
get(name) {
|
|
1736
1736
|
if (!(name in classes))
|
|
1737
|
-
throw new Error(`
|
|
1737
|
+
throw new Error(`Class <${name}> not registered for <${reg}>`);
|
|
1738
1738
|
return classes[name];
|
|
1739
1739
|
}
|
|
1740
1740
|
};
|
|
@@ -1755,7 +1755,7 @@
|
|
|
1755
1755
|
*/
|
|
1756
1756
|
function resolveCls(reg, cls) {
|
|
1757
1757
|
if (!(reg in registry))
|
|
1758
|
-
throw new ReferenceError(`
|
|
1758
|
+
throw new ReferenceError(`Registry <${reg}> does not exist`);
|
|
1759
1759
|
return (typeof cls === 'string' ? registry[reg]?.get(cls) : cls);
|
|
1760
1760
|
}
|
|
1761
1761
|
/**
|
|
@@ -1773,7 +1773,7 @@
|
|
|
1773
1773
|
return new cls(...args);
|
|
1774
1774
|
}
|
|
1775
1775
|
catch (err) {
|
|
1776
|
-
throw new Error(`
|
|
1776
|
+
throw new Error(`Cannot instantiate class <${cls}>`);
|
|
1777
1777
|
}
|
|
1778
1778
|
}
|
|
1779
1779
|
|
|
@@ -1908,7 +1908,7 @@
|
|
|
1908
1908
|
* @throws {Error} - If not overridden in a subclass
|
|
1909
1909
|
*/
|
|
1910
1910
|
compute(a, b, m, n, maxLen) {
|
|
1911
|
-
throw new Error(`
|
|
1911
|
+
throw new Error(`Method compute() must be overridden in a subclass`);
|
|
1912
1912
|
}
|
|
1913
1913
|
/**
|
|
1914
1914
|
* Run the metric computation for single inputs (two strings).
|
|
@@ -2065,7 +2065,7 @@
|
|
|
2065
2065
|
*/
|
|
2066
2066
|
isPairwise(safe = false) {
|
|
2067
2067
|
return this.isBatch() && this.a.length === this.b.length ? true : !safe && (() => {
|
|
2068
|
-
throw new Error(`
|
|
2068
|
+
throw new Error(`Mode <pairwise> requires arrays of equal length`);
|
|
2069
2069
|
})();
|
|
2070
2070
|
}
|
|
2071
2071
|
/**
|
|
@@ -2126,7 +2126,7 @@
|
|
|
2126
2126
|
this.runPairwise();
|
|
2127
2127
|
break;
|
|
2128
2128
|
// Unsupported mode
|
|
2129
|
-
default: throw new Error(`
|
|
2129
|
+
default: throw new Error(`Unsupported mode <${mode}>`);
|
|
2130
2130
|
}
|
|
2131
2131
|
}
|
|
2132
2132
|
/**
|
|
@@ -2161,7 +2161,7 @@
|
|
|
2161
2161
|
await this.runPairwiseAsync();
|
|
2162
2162
|
break;
|
|
2163
2163
|
// Unsupported mode
|
|
2164
|
-
default: throw new Error(`
|
|
2164
|
+
default: throw new Error(`Unsupported async mode <${mode}>`);
|
|
2165
2165
|
}
|
|
2166
2166
|
}
|
|
2167
2167
|
/**
|
|
@@ -2704,7 +2704,7 @@
|
|
|
2704
2704
|
}
|
|
2705
2705
|
// Standard: Error for unequal length
|
|
2706
2706
|
else
|
|
2707
|
-
throw new Error(`
|
|
2707
|
+
throw new Error(`Strings must be of equal length for Hamming Distance, a=${m} and b=${n} given, ` +
|
|
2708
2708
|
`use option.pad for automatic adjustment`);
|
|
2709
2709
|
}
|
|
2710
2710
|
// Calculate the Hamming distance
|
|
@@ -3353,8 +3353,8 @@
|
|
|
3353
3353
|
* pose a risk of infringing upon existing trademarks due to their pronunciation.
|
|
3354
3354
|
*
|
|
3355
3355
|
* This module provides an abstract class for generating phonetic indices based
|
|
3356
|
-
* on mappings and rules. It allows for the implementation of various
|
|
3357
|
-
* algorithms by extending the abstract class.
|
|
3356
|
+
* on mappings, patterns and rules. It allows for the implementation of various
|
|
3357
|
+
* phonetic algorithms by extending the abstract class.
|
|
3358
3358
|
*
|
|
3359
3359
|
* @module Phonetic
|
|
3360
3360
|
* @author Paul Köhler (komed3)
|
|
@@ -3394,22 +3394,55 @@
|
|
|
3394
3394
|
* Constructor for the Phonetic class.
|
|
3395
3395
|
*
|
|
3396
3396
|
* Initializes the phonetic algorithm with the specified options and mapping.
|
|
3397
|
+
* Options hierarchy: User input > mapping options > default
|
|
3397
3398
|
*
|
|
3398
3399
|
* @param {string} algo - The name of the algorithm (e.g. 'soundex')
|
|
3399
3400
|
* @param {PhoneticOptions} [opt] - Options for the phonetic algorithm
|
|
3400
3401
|
* @throws {Error} - If the requested mapping is not declared
|
|
3401
3402
|
*/
|
|
3402
3403
|
constructor(algo, opt = {}) {
|
|
3403
|
-
//
|
|
3404
|
-
|
|
3405
|
-
//
|
|
3406
|
-
const
|
|
3404
|
+
// Get the phonetic default options
|
|
3405
|
+
const defaults = this.constructor.default ?? {};
|
|
3406
|
+
// Determine phonetic map ID from options or use defaults
|
|
3407
|
+
const mapId = opt.map ?? defaults.map;
|
|
3408
|
+
// If no algorithm is specified, throw an error
|
|
3409
|
+
if (!mapId)
|
|
3410
|
+
throw new Error(`No mapping specified for phonetic algorithm`);
|
|
3411
|
+
// Get the mapping based on the determined map ID
|
|
3412
|
+
const map = PhoneticMappingRegistry.get(algo, mapId);
|
|
3407
3413
|
// If the mapping is not defined, throw an error
|
|
3408
3414
|
if (map === undefined)
|
|
3409
|
-
throw new Error(`
|
|
3415
|
+
throw new Error(`Requested mapping <${mapId}> is not declared`);
|
|
3416
|
+
// Set the options by merging the default options with the provided ones
|
|
3417
|
+
this.options = merge(merge(defaults, map.options ?? {}), opt);
|
|
3418
|
+
// Set the algorithm name and mapping
|
|
3410
3419
|
this.algo = algo;
|
|
3411
3420
|
this.map = map;
|
|
3412
3421
|
}
|
|
3422
|
+
/**
|
|
3423
|
+
* Applies patterns to a word based on the phonetic map.
|
|
3424
|
+
*
|
|
3425
|
+
* This method processes the word by applying all defined patterns in the
|
|
3426
|
+
* phonetic map. It replaces occurrences of specified patterns with their
|
|
3427
|
+
* corresponding replacements.
|
|
3428
|
+
*
|
|
3429
|
+
* @param {string} word - The input word to be processed
|
|
3430
|
+
* @returns {string} - The modified word after applying all patterns
|
|
3431
|
+
*/
|
|
3432
|
+
applyPattern(word) {
|
|
3433
|
+
const { patterns = [] } = this.map;
|
|
3434
|
+
// If no patterns are provided, return the input
|
|
3435
|
+
if (!patterns || !patterns.length)
|
|
3436
|
+
return word;
|
|
3437
|
+
// Iterate over the patterns and replace all matches
|
|
3438
|
+
for (const { pattern, replace, all = false } of patterns) {
|
|
3439
|
+
// Search for the pattern in the word and replace it
|
|
3440
|
+
// Use replaceAll if 'all' is true, otherwise use replace
|
|
3441
|
+
word = word[all ? 'replaceAll' : 'replace'](pattern, replace);
|
|
3442
|
+
}
|
|
3443
|
+
// Return the modified word after applying all patterns
|
|
3444
|
+
return word;
|
|
3445
|
+
}
|
|
3413
3446
|
/**
|
|
3414
3447
|
* Applies phonetic rules to a character in a word context.
|
|
3415
3448
|
*
|
|
@@ -3487,6 +3520,9 @@
|
|
|
3487
3520
|
*/
|
|
3488
3521
|
encode(word) {
|
|
3489
3522
|
const { map = {}, ignore = [] } = this.map;
|
|
3523
|
+
// Apply patterns to the word before processing
|
|
3524
|
+
// This allows for pre-processing of the word based on defined patterns
|
|
3525
|
+
word = this.applyPattern(word);
|
|
3490
3526
|
// Get the characters of the word and its length
|
|
3491
3527
|
const chars = this.word2Chars(word);
|
|
3492
3528
|
const charLen = chars.length;
|
|
@@ -3523,11 +3559,11 @@
|
|
|
3523
3559
|
* @returns {string|undefined} - The phonetic code or undefined if no code applies
|
|
3524
3560
|
*/
|
|
3525
3561
|
mapChar(char, i, chars, charLen, lastCode, map) {
|
|
3526
|
-
const { dedupe = true } = this.options;
|
|
3562
|
+
const { dedupe = true, fallback = undefined } = this.options;
|
|
3527
3563
|
// Apply phonetic rules to the character
|
|
3528
3564
|
// If no rules apply, use the mapping
|
|
3529
|
-
// If the character is not in the mapping, return
|
|
3530
|
-
const c = this.applyRules(char, i, chars, charLen) ?? map[char] ??
|
|
3565
|
+
// If the character is not in the mapping, return the fallback
|
|
3566
|
+
const c = this.applyRules(char, i, chars, charLen) ?? map[char] ?? fallback;
|
|
3531
3567
|
// De-duplicate the code if necessary
|
|
3532
3568
|
return dedupe && c === lastCode ? undefined : c;
|
|
3533
3569
|
}
|
|
@@ -3680,7 +3716,7 @@
|
|
|
3680
3716
|
add(algo, id, map, update = false) {
|
|
3681
3717
|
const mappings = maps(algo);
|
|
3682
3718
|
if (!update && id in mappings)
|
|
3683
|
-
throw new Error(`
|
|
3719
|
+
throw new Error(`Entry <${id}> already exists / use <update=true> to overwrite`);
|
|
3684
3720
|
mappings[id] = map;
|
|
3685
3721
|
},
|
|
3686
3722
|
/**
|
|
@@ -3716,6 +3752,188 @@
|
|
|
3716
3752
|
};
|
|
3717
3753
|
})();
|
|
3718
3754
|
|
|
3755
|
+
/**
|
|
3756
|
+
* Caverphone Phonetic Algorithm
|
|
3757
|
+
* src/phonetic/Caverphone.ts
|
|
3758
|
+
*
|
|
3759
|
+
* @see https://en.wikipedia.org/wiki/Caverphone
|
|
3760
|
+
*
|
|
3761
|
+
* This module implements the Caverphone phonetic algorithm, which is designed
|
|
3762
|
+
* to encode words into a phonetic representation. The Caverphone algorithm is
|
|
3763
|
+
* used primarily in New Zealand and was developed to assist in the indexing of
|
|
3764
|
+
* names in genealogical databases.
|
|
3765
|
+
*
|
|
3766
|
+
* It converts words into a standardized phonetic code, allowing for variations
|
|
3767
|
+
* in spelling and pronunciation to be matched.
|
|
3768
|
+
*
|
|
3769
|
+
* @module Phonetic/Caverphone
|
|
3770
|
+
* @author Paul Köhler (komed3)
|
|
3771
|
+
* @license MIT
|
|
3772
|
+
*/
|
|
3773
|
+
/**
|
|
3774
|
+
* Caverphone class extends the Phonetic class to implement the Caverphone phonetic algorithm.
|
|
3775
|
+
*/
|
|
3776
|
+
class Caverphone extends Phonetic {
|
|
3777
|
+
// Default options for the Caverphone phonetic algorithm
|
|
3778
|
+
static default = {
|
|
3779
|
+
map: 'en2', delimiter: ' ', length: -1, pad: '', dedupe: false
|
|
3780
|
+
};
|
|
3781
|
+
/**
|
|
3782
|
+
* Constructor for the Caverphone class.
|
|
3783
|
+
*
|
|
3784
|
+
* Initializes the Caverphone phonetic algorithm with the mapping and options.
|
|
3785
|
+
*
|
|
3786
|
+
* @param {PhoneticOptions} [opt] - Options for the Caverphone phonetic algorithm
|
|
3787
|
+
*/
|
|
3788
|
+
constructor(opt = {}) { super('caverphone', opt); }
|
|
3789
|
+
/**
|
|
3790
|
+
* Generates the Caverphone code for a given word.
|
|
3791
|
+
*
|
|
3792
|
+
* @param {string} word - The input word to be converted into a Caverphone code
|
|
3793
|
+
* @returns {string} - The generated Caverphone code
|
|
3794
|
+
*/
|
|
3795
|
+
encode(word) {
|
|
3796
|
+
// Remove anything not A-Z and convert to lowercase
|
|
3797
|
+
word = word.replace(/[^A-Z]/gi, '').toLowerCase();
|
|
3798
|
+
// Use the base implementation for rule/mapping application
|
|
3799
|
+
return super.encode(word);
|
|
3800
|
+
}
|
|
3801
|
+
/**
|
|
3802
|
+
* Overrides the mapChar method to skip character mapping.
|
|
3803
|
+
*
|
|
3804
|
+
* @param {string} char - The character to be mapped
|
|
3805
|
+
* @returns {string} - The mapped character
|
|
3806
|
+
*/
|
|
3807
|
+
mapChar(char) { return char; }
|
|
3808
|
+
/**
|
|
3809
|
+
* Adjusts the phonetic code to uppercase.
|
|
3810
|
+
*
|
|
3811
|
+
* @param {string} code - The phonetic code to adjust
|
|
3812
|
+
* @returns {string} - The adjusted phonetic code
|
|
3813
|
+
*/
|
|
3814
|
+
adjustCode(code) { return code.toUpperCase(); }
|
|
3815
|
+
}
|
|
3816
|
+
// Register the Caverphone algorithm in the phonetic registry
|
|
3817
|
+
PhoneticRegistry.add('caverphone', Caverphone);
|
|
3818
|
+
// Register the Caverphone 1.0 phonetic mapping for English
|
|
3819
|
+
PhoneticMappingRegistry.add('caverphone', 'en1', {
|
|
3820
|
+
options: { length: 6, pad: '1' },
|
|
3821
|
+
map: {},
|
|
3822
|
+
patterns: [
|
|
3823
|
+
// Special word-initial replacements
|
|
3824
|
+
{ pattern: /^(c|r|t|en)ough/, replace: '$1ou2f' },
|
|
3825
|
+
{ pattern: /^gn/, replace: '2n' },
|
|
3826
|
+
// Special word-final replacement
|
|
3827
|
+
{ pattern: /mb$/, replace: 'm2' },
|
|
3828
|
+
// Character group replacements
|
|
3829
|
+
{ pattern: /cq/g, replace: '2q' },
|
|
3830
|
+
{ pattern: /c(e|i|y)/g, replace: 's$1' },
|
|
3831
|
+
{ pattern: /tch/g, replace: '2ch' },
|
|
3832
|
+
{ pattern: /[cqx]/g, replace: 'k' },
|
|
3833
|
+
{ pattern: /v/g, replace: 'f' },
|
|
3834
|
+
{ pattern: /dg/g, replace: '2g' },
|
|
3835
|
+
{ pattern: /ti(a|o)/g, replace: 'si$1' },
|
|
3836
|
+
{ pattern: /d/g, replace: 't' },
|
|
3837
|
+
{ pattern: /ph/g, replace: 'fh' },
|
|
3838
|
+
{ pattern: /b/g, replace: 'p' },
|
|
3839
|
+
{ pattern: /sh/g, replace: 's2' },
|
|
3840
|
+
{ pattern: /z/g, replace: 's' },
|
|
3841
|
+
// Vowel handling
|
|
3842
|
+
{ pattern: /^[aeiou]/, replace: 'A' },
|
|
3843
|
+
{ pattern: /[aeiou]/g, replace: '3' },
|
|
3844
|
+
// Special gh handling
|
|
3845
|
+
{ pattern: /3gh3/g, replace: '3kh3' },
|
|
3846
|
+
{ pattern: /gh/g, replace: '22' },
|
|
3847
|
+
// Single character replacements
|
|
3848
|
+
{ pattern: /g/g, replace: 'k' },
|
|
3849
|
+
// Collapse repeated consonants
|
|
3850
|
+
{ pattern: /s+/g, replace: 'S' },
|
|
3851
|
+
{ pattern: /t+/g, replace: 'T' },
|
|
3852
|
+
{ pattern: /p+/g, replace: 'P' },
|
|
3853
|
+
{ pattern: /k+/g, replace: 'K' },
|
|
3854
|
+
{ pattern: /f+/g, replace: 'F' },
|
|
3855
|
+
{ pattern: /m+/g, replace: 'M' },
|
|
3856
|
+
{ pattern: /n+/g, replace: 'N' },
|
|
3857
|
+
// Y and other single-letter handling
|
|
3858
|
+
{ pattern: /j/g, replace: 'y' },
|
|
3859
|
+
// L/R/W/Y3 handling
|
|
3860
|
+
{ pattern: /l3/g, replace: 'L3' },
|
|
3861
|
+
{ pattern: /r3/g, replace: 'R3' },
|
|
3862
|
+
{ pattern: /w3/g, replace: 'W3' },
|
|
3863
|
+
{ pattern: /y3/g, replace: 'Y3' },
|
|
3864
|
+
// L/R/W followed by y
|
|
3865
|
+
{ pattern: /ly/g, replace: 'Ly' },
|
|
3866
|
+
{ pattern: /ry/g, replace: 'Ry' },
|
|
3867
|
+
{ pattern: /wy/g, replace: 'Wy' },
|
|
3868
|
+
// WH handling
|
|
3869
|
+
{ pattern: /wh3/g, replace: 'Wh3' },
|
|
3870
|
+
{ pattern: /why/g, replace: 'Why' },
|
|
3871
|
+
// H at start
|
|
3872
|
+
{ pattern: /^h/, replace: 'A' },
|
|
3873
|
+
// Remove certain letters
|
|
3874
|
+
{ pattern: /[hlrwy23]/g, replace: '' }
|
|
3875
|
+
]
|
|
3876
|
+
});
|
|
3877
|
+
// Register the Caverphone 2.0 phonetic mapping for English
|
|
3878
|
+
PhoneticMappingRegistry.add('caverphone', 'en2', {
|
|
3879
|
+
options: { length: 10, pad: '1' },
|
|
3880
|
+
map: {},
|
|
3881
|
+
patterns: [
|
|
3882
|
+
// Remove trailing 'e'
|
|
3883
|
+
{ pattern: /e$/, replace: '' },
|
|
3884
|
+
// Special word-initial replacements
|
|
3885
|
+
{ pattern: /^(c|r|t|en|tr)ough/, replace: '$1ou2f' },
|
|
3886
|
+
{ pattern: /^gn/, replace: '2n' },
|
|
3887
|
+
// Special word-final replacement
|
|
3888
|
+
{ pattern: /mb$/, replace: 'm2' },
|
|
3889
|
+
// Character group replacements
|
|
3890
|
+
{ pattern: /cq/g, replace: '2q' },
|
|
3891
|
+
{ pattern: /c(e|i|y)/g, replace: 's$1' },
|
|
3892
|
+
{ pattern: /tch/g, replace: '2ch' },
|
|
3893
|
+
{ pattern: /[cqx]/g, replace: 'k' },
|
|
3894
|
+
{ pattern: /v/g, replace: 'f' },
|
|
3895
|
+
{ pattern: /dg/g, replace: '2g' },
|
|
3896
|
+
{ pattern: /ti(a|o)/g, replace: 'si$1' },
|
|
3897
|
+
{ pattern: /d/g, replace: 't' },
|
|
3898
|
+
{ pattern: /ph/g, replace: 'fh' },
|
|
3899
|
+
{ pattern: /b/g, replace: 'p' },
|
|
3900
|
+
{ pattern: /sh/g, replace: 's2' },
|
|
3901
|
+
{ pattern: /z/g, replace: 's' },
|
|
3902
|
+
// Vowel handling
|
|
3903
|
+
{ pattern: /^[aeiou]/, replace: 'A' },
|
|
3904
|
+
{ pattern: /[aeiou]/g, replace: '3' },
|
|
3905
|
+
// Y handling
|
|
3906
|
+
{ pattern: /j/g, replace: 'y' },
|
|
3907
|
+
{ pattern: /^y3/, replace: 'Y3' },
|
|
3908
|
+
{ pattern: /^y/, replace: 'A' },
|
|
3909
|
+
{ pattern: /y/g, replace: '3' },
|
|
3910
|
+
// Special gh handling
|
|
3911
|
+
{ pattern: /3gh3/g, replace: '3kh3' },
|
|
3912
|
+
{ pattern: /gh/g, replace: '22' },
|
|
3913
|
+
// Single character replacements
|
|
3914
|
+
{ pattern: /g/g, replace: 'k' },
|
|
3915
|
+
// Collapse repeated consonants
|
|
3916
|
+
{ pattern: /s+/g, replace: 'S' },
|
|
3917
|
+
{ pattern: /t+/g, replace: 'T' },
|
|
3918
|
+
{ pattern: /p+/g, replace: 'P' },
|
|
3919
|
+
{ pattern: /k+/g, replace: 'K' },
|
|
3920
|
+
{ pattern: /f+/g, replace: 'F' },
|
|
3921
|
+
{ pattern: /m+/g, replace: 'M' },
|
|
3922
|
+
{ pattern: /n+/g, replace: 'N' },
|
|
3923
|
+
// L/R/W3 handling
|
|
3924
|
+
{ pattern: /l3/g, replace: 'L3' },
|
|
3925
|
+
{ pattern: /r3/g, replace: 'R3' },
|
|
3926
|
+
{ pattern: /w3/g, replace: 'W3' },
|
|
3927
|
+
{ pattern: /wh3/g, replace: 'Wh3' },
|
|
3928
|
+
{ pattern: /[lrw]$/, replace: '3' },
|
|
3929
|
+
// // H at start and final 3 handling
|
|
3930
|
+
{ pattern: /^h/, replace: 'A' },
|
|
3931
|
+
{ pattern: /3$/, replace: 'A' },
|
|
3932
|
+
// Remove certain letters
|
|
3933
|
+
{ pattern: /[hlrw23]/g, replace: '' }
|
|
3934
|
+
]
|
|
3935
|
+
});
|
|
3936
|
+
|
|
3719
3937
|
/**
|
|
3720
3938
|
* Cologne Phonetic Algorithm
|
|
3721
3939
|
* src/phonetic/Cologne.ts
|