cmpstr 3.0.1 → 3.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -1
- package/dist/CmpStr.esm.js +257 -25
- package/dist/CmpStr.esm.js.map +1 -1
- package/dist/CmpStr.esm.min.js +2 -2
- package/dist/CmpStr.esm.min.js.map +1 -1
- package/dist/CmpStr.umd.js +257 -25
- package/dist/CmpStr.umd.js.map +1 -1
- package/dist/CmpStr.umd.min.js +2 -2
- package/dist/CmpStr.umd.min.js.map +1 -1
- package/dist/cjs/CmpStr.cjs +12 -1
- package/dist/cjs/CmpStr.cjs.map +1 -1
- package/dist/cjs/CmpStrAsync.cjs +11 -1
- package/dist/cjs/CmpStrAsync.cjs.map +1 -1
- package/dist/cjs/index.cjs +1 -1
- package/dist/cjs/metric/Cosine.cjs +1 -1
- package/dist/cjs/metric/Cosine.cjs.map +1 -1
- package/dist/cjs/metric/DamerauLevenshtein.cjs +1 -1
- package/dist/cjs/metric/DamerauLevenshtein.cjs.map +1 -1
- package/dist/cjs/metric/DiceSorensen.cjs +1 -1
- package/dist/cjs/metric/DiceSorensen.cjs.map +1 -1
- package/dist/cjs/metric/Hamming.cjs +2 -2
- package/dist/cjs/metric/Hamming.cjs.map +1 -1
- package/dist/cjs/metric/Jaccard.cjs +1 -1
- package/dist/cjs/metric/Jaccard.cjs.map +1 -1
- package/dist/cjs/metric/JaroWinkler.cjs +1 -1
- package/dist/cjs/metric/JaroWinkler.cjs.map +1 -1
- package/dist/cjs/metric/LCS.cjs +1 -1
- package/dist/cjs/metric/LCS.cjs.map +1 -1
- package/dist/cjs/metric/Levenshtein.cjs +1 -1
- package/dist/cjs/metric/Levenshtein.cjs.map +1 -1
- package/dist/cjs/metric/Metric.cjs +6 -6
- package/dist/cjs/metric/Metric.cjs.map +1 -1
- package/dist/cjs/metric/NeedlemanWunsch.cjs +1 -1
- package/dist/cjs/metric/NeedlemanWunsch.cjs.map +1 -1
- package/dist/cjs/metric/SmithWaterman.cjs +1 -1
- package/dist/cjs/metric/SmithWaterman.cjs.map +1 -1
- package/dist/cjs/metric/qGram.cjs +1 -1
- package/dist/cjs/metric/qGram.cjs.map +1 -1
- package/dist/cjs/phonetic/Caverphone.cjs +199 -0
- package/dist/cjs/phonetic/Caverphone.cjs.map +1 -0
- package/dist/cjs/phonetic/Cologne.cjs +1 -1
- package/dist/cjs/phonetic/Cologne.cjs.map +1 -1
- package/dist/cjs/phonetic/Metaphone.cjs +1 -1
- package/dist/cjs/phonetic/Metaphone.cjs.map +1 -1
- package/dist/cjs/phonetic/Phonetic.cjs +50 -16
- package/dist/cjs/phonetic/Phonetic.cjs.map +1 -1
- package/dist/cjs/phonetic/Soundex.cjs +1 -1
- package/dist/cjs/phonetic/Soundex.cjs.map +1 -1
- package/dist/cjs/utils/DeepMerge.cjs +2 -2
- package/dist/cjs/utils/DeepMerge.cjs.map +1 -1
- package/dist/cjs/utils/DiffChecker.cjs +10 -10
- package/dist/cjs/utils/DiffChecker.cjs.map +1 -1
- package/dist/cjs/utils/Filter.cjs +1 -1
- package/dist/cjs/utils/Filter.cjs.map +1 -1
- package/dist/cjs/utils/HashTable.cjs +1 -1
- package/dist/cjs/utils/HashTable.cjs.map +1 -1
- package/dist/cjs/utils/Normalizer.cjs +1 -1
- package/dist/cjs/utils/Normalizer.cjs.map +1 -1
- package/dist/cjs/utils/Pool.cjs +1 -1
- package/dist/cjs/utils/Pool.cjs.map +1 -1
- package/dist/cjs/utils/Profiler.cjs +3 -3
- package/dist/cjs/utils/Profiler.cjs.map +1 -1
- package/dist/cjs/utils/Registry.cjs +7 -7
- package/dist/cjs/utils/Registry.cjs.map +1 -1
- package/dist/cjs/utils/TextAnalyzer.cjs +2 -2
- package/dist/cjs/utils/TextAnalyzer.cjs.map +1 -1
- package/dist/esm/{CmpStr.js → CmpStr.mjs} +36 -25
- package/dist/esm/CmpStr.mjs.map +1 -0
- package/dist/esm/{CmpStrAsync.js → CmpStrAsync.mjs} +16 -6
- package/dist/esm/CmpStrAsync.mjs.map +1 -0
- package/dist/esm/index.mjs +7 -0
- package/dist/esm/index.mjs.map +1 -0
- package/dist/esm/metric/{Cosine.js → Cosine.mjs} +4 -4
- package/dist/esm/metric/Cosine.mjs.map +1 -0
- package/dist/esm/metric/{DamerauLevenshtein.js → DamerauLevenshtein.mjs} +4 -4
- package/dist/esm/metric/DamerauLevenshtein.mjs.map +1 -0
- package/dist/esm/metric/{DiceSorensen.js → DiceSorensen.mjs} +4 -4
- package/dist/esm/metric/DiceSorensen.mjs.map +1 -0
- package/dist/esm/metric/{Hamming.js → Hamming.mjs} +4 -4
- package/dist/esm/metric/Hamming.mjs.map +1 -0
- package/dist/esm/metric/{Jaccard.js → Jaccard.mjs} +4 -4
- package/dist/esm/metric/Jaccard.mjs.map +1 -0
- package/dist/esm/metric/{JaroWinkler.js → JaroWinkler.mjs} +4 -4
- package/dist/esm/metric/JaroWinkler.mjs.map +1 -0
- package/dist/esm/metric/{LCS.js → LCS.mjs} +4 -4
- package/dist/esm/metric/LCS.mjs.map +1 -0
- package/dist/esm/metric/{Levenshtein.js → Levenshtein.mjs} +4 -4
- package/dist/esm/metric/Levenshtein.mjs.map +1 -0
- package/dist/esm/metric/{Metric.js → Metric.mjs} +10 -10
- package/dist/esm/metric/Metric.mjs.map +1 -0
- package/dist/esm/metric/{NeedlemanWunsch.js → NeedlemanWunsch.mjs} +4 -4
- package/dist/esm/metric/NeedlemanWunsch.mjs.map +1 -0
- package/dist/esm/metric/{SmithWaterman.js → SmithWaterman.mjs} +4 -4
- package/dist/esm/metric/SmithWaterman.mjs.map +1 -0
- package/dist/esm/metric/{qGram.js → qGram.mjs} +4 -4
- package/dist/esm/metric/qGram.mjs.map +1 -0
- package/dist/esm/phonetic/Caverphone.mjs +201 -0
- package/dist/esm/phonetic/Caverphone.mjs.map +1 -0
- package/dist/esm/phonetic/{Cologne.js → Cologne.mjs} +3 -3
- package/dist/esm/phonetic/Cologne.mjs.map +1 -0
- package/dist/esm/phonetic/{Metaphone.js → Metaphone.mjs} +3 -3
- package/dist/esm/phonetic/Metaphone.mjs.map +1 -0
- package/dist/esm/phonetic/{Phonetic.js → Phonetic.mjs} +52 -21
- package/dist/esm/phonetic/Phonetic.mjs.map +1 -0
- package/dist/esm/phonetic/{Soundex.js → Soundex.mjs} +3 -3
- package/dist/esm/phonetic/Soundex.mjs.map +1 -0
- package/dist/esm/utils/{DeepMerge.js → DeepMerge.mjs} +3 -3
- package/dist/esm/utils/DeepMerge.mjs.map +1 -0
- package/dist/esm/utils/{DiffChecker.js → DiffChecker.mjs} +11 -11
- package/dist/esm/utils/DiffChecker.mjs.map +1 -0
- package/dist/esm/utils/{Filter.js → Filter.mjs} +2 -2
- package/dist/esm/utils/Filter.mjs.map +1 -0
- package/dist/esm/utils/{HashTable.js → HashTable.mjs} +2 -2
- package/dist/esm/utils/HashTable.mjs.map +1 -0
- package/dist/esm/utils/{Normalizer.js → Normalizer.mjs} +3 -3
- package/dist/esm/utils/Normalizer.mjs.map +1 -0
- package/dist/esm/utils/{Pool.js → Pool.mjs} +2 -2
- package/dist/esm/utils/Pool.mjs.map +1 -0
- package/dist/esm/utils/{Profiler.js → Profiler.mjs} +4 -4
- package/dist/esm/utils/Profiler.mjs.map +1 -0
- package/dist/esm/utils/{Registry.js → Registry.mjs} +8 -8
- package/dist/esm/utils/Registry.mjs.map +1 -0
- package/dist/esm/utils/{TextAnalyzer.js → TextAnalyzer.mjs} +3 -3
- package/dist/esm/utils/TextAnalyzer.mjs.map +1 -0
- package/dist/types/index.d.ts +3 -2
- package/dist/types/phonetic/Caverphone.d.ts +55 -0
- package/dist/types/phonetic/Phonetic.d.ts +14 -2
- package/dist/types/phonetic/index.d.ts +1 -0
- package/dist/types/utils/Types.d.ts +12 -0
- package/package.json +15 -13
- package/dist/esm/CmpStr.js.map +0 -1
- package/dist/esm/CmpStrAsync.js.map +0 -1
- package/dist/esm/index.js +0 -7
- package/dist/esm/index.js.map +0 -1
- package/dist/esm/metric/Cosine.js.map +0 -1
- package/dist/esm/metric/DamerauLevenshtein.js.map +0 -1
- package/dist/esm/metric/DiceSorensen.js.map +0 -1
- package/dist/esm/metric/Hamming.js.map +0 -1
- package/dist/esm/metric/Jaccard.js.map +0 -1
- package/dist/esm/metric/JaroWinkler.js.map +0 -1
- package/dist/esm/metric/LCS.js.map +0 -1
- package/dist/esm/metric/Levenshtein.js.map +0 -1
- package/dist/esm/metric/Metric.js.map +0 -1
- package/dist/esm/metric/NeedlemanWunsch.js.map +0 -1
- package/dist/esm/metric/SmithWaterman.js.map +0 -1
- package/dist/esm/metric/qGram.js.map +0 -1
- package/dist/esm/phonetic/Cologne.js.map +0 -1
- package/dist/esm/phonetic/Metaphone.js.map +0 -1
- package/dist/esm/phonetic/Phonetic.js.map +0 -1
- package/dist/esm/phonetic/Soundex.js.map +0 -1
- package/dist/esm/utils/DeepMerge.js.map +0 -1
- package/dist/esm/utils/DiffChecker.js.map +0 -1
- package/dist/esm/utils/Filter.js.map +0 -1
- package/dist/esm/utils/HashTable.js.map +0 -1
- package/dist/esm/utils/Normalizer.js.map +0 -1
- package/dist/esm/utils/Pool.js.map +0 -1
- package/dist/esm/utils/Profiler.js.map +0 -1
- package/dist/esm/utils/Registry.js.map +0 -1
- package/dist/esm/utils/TextAnalyzer.js.map +0 -1
package/dist/CmpStr.umd.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* CmpStr v3.0.
|
|
2
|
+
* CmpStr v3.0.3 build-462b952-250813
|
|
3
3
|
* This is a lightweight, fast and well performing library for calculating string similarity.
|
|
4
4
|
* (c) 2023-2025 Paul Köhler @komed3 / MIT License
|
|
5
5
|
* Visit https://github.com/komed3/cmpstr and https://npmjs.org/package/cmpstr
|
|
@@ -67,7 +67,7 @@
|
|
|
67
67
|
const [k, ...r] = parse(path);
|
|
68
68
|
// Throw an error if the key is not a valid identifier
|
|
69
69
|
if (t !== undefined && (typeof t !== 'object' || t === null))
|
|
70
|
-
throw Error(`
|
|
70
|
+
throw Error(`Cannot set property <${k}> of <${JSON.stringify(t)}>`);
|
|
71
71
|
// Assign the value to the specified key in the object
|
|
72
72
|
return Object.assign(t ?? (typeof k === 'number' ? [] : Object.create(null)), {
|
|
73
73
|
[k]: set(t?.[k], r.join('.'), value)
|
|
@@ -1686,7 +1686,7 @@
|
|
|
1686
1686
|
function Registry(reg, ctor) {
|
|
1687
1687
|
// Throws an error if the registry already exists
|
|
1688
1688
|
if (reg in registry || reg in factory)
|
|
1689
|
-
throw new Error(`
|
|
1689
|
+
throw new Error(`Registry <${reg}> already exists / overwriting is forbidden`);
|
|
1690
1690
|
// Create a registry object to hold class constructors
|
|
1691
1691
|
const classes = Object.create(null);
|
|
1692
1692
|
const service = {
|
|
@@ -1701,9 +1701,9 @@
|
|
|
1701
1701
|
*/
|
|
1702
1702
|
add(name, cls, update = false) {
|
|
1703
1703
|
if (!(cls.prototype instanceof ctor))
|
|
1704
|
-
throw new TypeError(`
|
|
1704
|
+
throw new TypeError(`Class must extend <${reg}>`);
|
|
1705
1705
|
if (!update && name in classes)
|
|
1706
|
-
throw new Error(`
|
|
1706
|
+
throw new Error(`Entry <${name}> already exists / use <update=true> to overwrite`);
|
|
1707
1707
|
classes[name] = cls;
|
|
1708
1708
|
},
|
|
1709
1709
|
/**
|
|
@@ -1734,7 +1734,7 @@
|
|
|
1734
1734
|
*/
|
|
1735
1735
|
get(name) {
|
|
1736
1736
|
if (!(name in classes))
|
|
1737
|
-
throw new Error(`
|
|
1737
|
+
throw new Error(`Class <${name}> not registered for <${reg}>`);
|
|
1738
1738
|
return classes[name];
|
|
1739
1739
|
}
|
|
1740
1740
|
};
|
|
@@ -1755,7 +1755,7 @@
|
|
|
1755
1755
|
*/
|
|
1756
1756
|
function resolveCls(reg, cls) {
|
|
1757
1757
|
if (!(reg in registry))
|
|
1758
|
-
throw new ReferenceError(`
|
|
1758
|
+
throw new ReferenceError(`Registry <${reg}> does not exist`);
|
|
1759
1759
|
return (typeof cls === 'string' ? registry[reg]?.get(cls) : cls);
|
|
1760
1760
|
}
|
|
1761
1761
|
/**
|
|
@@ -1773,7 +1773,9 @@
|
|
|
1773
1773
|
return new cls(...args);
|
|
1774
1774
|
}
|
|
1775
1775
|
catch (err) {
|
|
1776
|
-
throw new Error(`
|
|
1776
|
+
throw new Error(`Cannot instantiate class <${cls}>`, {
|
|
1777
|
+
cause: err
|
|
1778
|
+
});
|
|
1777
1779
|
}
|
|
1778
1780
|
}
|
|
1779
1781
|
|
|
@@ -1871,7 +1873,7 @@
|
|
|
1871
1873
|
this.b = Array.isArray(b) ? b : [b];
|
|
1872
1874
|
// Validate inputs: ensure they are not empty
|
|
1873
1875
|
if (this.a.length === 0 || this.b.length === 0)
|
|
1874
|
-
throw new Error(`
|
|
1876
|
+
throw new Error(`Inputs <a> and <b> must not be empty`);
|
|
1875
1877
|
// Set options
|
|
1876
1878
|
this.options = opt;
|
|
1877
1879
|
this.symmetric = symmetric;
|
|
@@ -1908,7 +1910,7 @@
|
|
|
1908
1910
|
* @throws {Error} - If not overridden in a subclass
|
|
1909
1911
|
*/
|
|
1910
1912
|
compute(a, b, m, n, maxLen) {
|
|
1911
|
-
throw new Error(`
|
|
1913
|
+
throw new Error(`Method compute() must be overridden in a subclass`);
|
|
1912
1914
|
}
|
|
1913
1915
|
/**
|
|
1914
1916
|
* Run the metric computation for single inputs (two strings).
|
|
@@ -2065,7 +2067,7 @@
|
|
|
2065
2067
|
*/
|
|
2066
2068
|
isPairwise(safe = false) {
|
|
2067
2069
|
return this.isBatch() && this.a.length === this.b.length ? true : !safe && (() => {
|
|
2068
|
-
throw new Error(`
|
|
2070
|
+
throw new Error(`Mode <pairwise> requires arrays of equal length`);
|
|
2069
2071
|
})();
|
|
2070
2072
|
}
|
|
2071
2073
|
/**
|
|
@@ -2126,7 +2128,7 @@
|
|
|
2126
2128
|
this.runPairwise();
|
|
2127
2129
|
break;
|
|
2128
2130
|
// Unsupported mode
|
|
2129
|
-
default: throw new Error(`
|
|
2131
|
+
default: throw new Error(`Unsupported mode <${mode}>`);
|
|
2130
2132
|
}
|
|
2131
2133
|
}
|
|
2132
2134
|
/**
|
|
@@ -2161,7 +2163,7 @@
|
|
|
2161
2163
|
await this.runPairwiseAsync();
|
|
2162
2164
|
break;
|
|
2163
2165
|
// Unsupported mode
|
|
2164
|
-
default: throw new Error(`
|
|
2166
|
+
default: throw new Error(`Unsupported async mode <${mode}>`);
|
|
2165
2167
|
}
|
|
2166
2168
|
}
|
|
2167
2169
|
/**
|
|
@@ -2704,7 +2706,7 @@
|
|
|
2704
2706
|
}
|
|
2705
2707
|
// Standard: Error for unequal length
|
|
2706
2708
|
else
|
|
2707
|
-
throw new Error(`
|
|
2709
|
+
throw new Error(`Strings must be of equal length for Hamming Distance, a=${m} and b=${n} given, ` +
|
|
2708
2710
|
`use option.pad for automatic adjustment`);
|
|
2709
2711
|
}
|
|
2710
2712
|
// Calculate the Hamming distance
|
|
@@ -3353,8 +3355,8 @@
|
|
|
3353
3355
|
* pose a risk of infringing upon existing trademarks due to their pronunciation.
|
|
3354
3356
|
*
|
|
3355
3357
|
* This module provides an abstract class for generating phonetic indices based
|
|
3356
|
-
* on mappings and rules. It allows for the implementation of various
|
|
3357
|
-
* algorithms by extending the abstract class.
|
|
3358
|
+
* on mappings, patterns and rules. It allows for the implementation of various
|
|
3359
|
+
* phonetic algorithms by extending the abstract class.
|
|
3358
3360
|
*
|
|
3359
3361
|
* @module Phonetic
|
|
3360
3362
|
* @author Paul Köhler (komed3)
|
|
@@ -3394,22 +3396,55 @@
|
|
|
3394
3396
|
* Constructor for the Phonetic class.
|
|
3395
3397
|
*
|
|
3396
3398
|
* Initializes the phonetic algorithm with the specified options and mapping.
|
|
3399
|
+
* Options hierarchy: User input > mapping options > default
|
|
3397
3400
|
*
|
|
3398
3401
|
* @param {string} algo - The name of the algorithm (e.g. 'soundex')
|
|
3399
3402
|
* @param {PhoneticOptions} [opt] - Options for the phonetic algorithm
|
|
3400
3403
|
* @throws {Error} - If the requested mapping is not declared
|
|
3401
3404
|
*/
|
|
3402
3405
|
constructor(algo, opt = {}) {
|
|
3403
|
-
//
|
|
3404
|
-
|
|
3405
|
-
//
|
|
3406
|
-
const
|
|
3406
|
+
// Get the phonetic default options
|
|
3407
|
+
const defaults = this.constructor.default ?? {};
|
|
3408
|
+
// Determine phonetic map ID from options or use defaults
|
|
3409
|
+
const mapId = opt.map ?? defaults.map;
|
|
3410
|
+
// If no algorithm is specified, throw an error
|
|
3411
|
+
if (!mapId)
|
|
3412
|
+
throw new Error(`No mapping specified for phonetic algorithm`);
|
|
3413
|
+
// Get the mapping based on the determined map ID
|
|
3414
|
+
const map = PhoneticMappingRegistry.get(algo, mapId);
|
|
3407
3415
|
// If the mapping is not defined, throw an error
|
|
3408
3416
|
if (map === undefined)
|
|
3409
|
-
throw new Error(`
|
|
3417
|
+
throw new Error(`Requested mapping <${mapId}> is not declared`);
|
|
3418
|
+
// Set the options by merging the default options with the provided ones
|
|
3419
|
+
this.options = merge(merge(defaults, map.options ?? {}), opt);
|
|
3420
|
+
// Set the algorithm name and mapping
|
|
3410
3421
|
this.algo = algo;
|
|
3411
3422
|
this.map = map;
|
|
3412
3423
|
}
|
|
3424
|
+
/**
|
|
3425
|
+
* Applies patterns to a word based on the phonetic map.
|
|
3426
|
+
*
|
|
3427
|
+
* This method processes the word by applying all defined patterns in the
|
|
3428
|
+
* phonetic map. It replaces occurrences of specified patterns with their
|
|
3429
|
+
* corresponding replacements.
|
|
3430
|
+
*
|
|
3431
|
+
* @param {string} word - The input word to be processed
|
|
3432
|
+
* @returns {string} - The modified word after applying all patterns
|
|
3433
|
+
*/
|
|
3434
|
+
applyPattern(word) {
|
|
3435
|
+
const { patterns = [] } = this.map;
|
|
3436
|
+
// If no patterns are provided, return the input
|
|
3437
|
+
if (!patterns || !patterns.length)
|
|
3438
|
+
return word;
|
|
3439
|
+
// Iterate over the patterns and replace all matches
|
|
3440
|
+
for (const { pattern, replace, all = false } of patterns) {
|
|
3441
|
+
// Search for the pattern in the word and replace it
|
|
3442
|
+
// Use replaceAll if 'all' is true, otherwise use replace
|
|
3443
|
+
word = word[all ? 'replaceAll' : 'replace'](pattern, replace);
|
|
3444
|
+
}
|
|
3445
|
+
// Return the modified word after applying all patterns
|
|
3446
|
+
return word;
|
|
3447
|
+
}
|
|
3413
3448
|
/**
|
|
3414
3449
|
* Applies phonetic rules to a character in a word context.
|
|
3415
3450
|
*
|
|
@@ -3487,6 +3522,9 @@
|
|
|
3487
3522
|
*/
|
|
3488
3523
|
encode(word) {
|
|
3489
3524
|
const { map = {}, ignore = [] } = this.map;
|
|
3525
|
+
// Apply patterns to the word before processing
|
|
3526
|
+
// This allows for pre-processing of the word based on defined patterns
|
|
3527
|
+
word = this.applyPattern(word);
|
|
3490
3528
|
// Get the characters of the word and its length
|
|
3491
3529
|
const chars = this.word2Chars(word);
|
|
3492
3530
|
const charLen = chars.length;
|
|
@@ -3523,11 +3561,11 @@
|
|
|
3523
3561
|
* @returns {string|undefined} - The phonetic code or undefined if no code applies
|
|
3524
3562
|
*/
|
|
3525
3563
|
mapChar(char, i, chars, charLen, lastCode, map) {
|
|
3526
|
-
const { dedupe = true } = this.options;
|
|
3564
|
+
const { dedupe = true, fallback = undefined } = this.options;
|
|
3527
3565
|
// Apply phonetic rules to the character
|
|
3528
3566
|
// If no rules apply, use the mapping
|
|
3529
|
-
// If the character is not in the mapping, return
|
|
3530
|
-
const c = this.applyRules(char, i, chars, charLen) ?? map[char] ??
|
|
3567
|
+
// If the character is not in the mapping, return the fallback
|
|
3568
|
+
const c = this.applyRules(char, i, chars, charLen) ?? map[char] ?? fallback;
|
|
3531
3569
|
// De-duplicate the code if necessary
|
|
3532
3570
|
return dedupe && c === lastCode ? undefined : c;
|
|
3533
3571
|
}
|
|
@@ -3680,7 +3718,7 @@
|
|
|
3680
3718
|
add(algo, id, map, update = false) {
|
|
3681
3719
|
const mappings = maps(algo);
|
|
3682
3720
|
if (!update && id in mappings)
|
|
3683
|
-
throw new Error(`
|
|
3721
|
+
throw new Error(`Entry <${id}> already exists / use <update=true> to overwrite`);
|
|
3684
3722
|
mappings[id] = map;
|
|
3685
3723
|
},
|
|
3686
3724
|
/**
|
|
@@ -3716,6 +3754,188 @@
|
|
|
3716
3754
|
};
|
|
3717
3755
|
})();
|
|
3718
3756
|
|
|
3757
|
+
/**
|
|
3758
|
+
* Caverphone Phonetic Algorithm
|
|
3759
|
+
* src/phonetic/Caverphone.ts
|
|
3760
|
+
*
|
|
3761
|
+
* @see https://en.wikipedia.org/wiki/Caverphone
|
|
3762
|
+
*
|
|
3763
|
+
* This module implements the Caverphone phonetic algorithm, which is designed
|
|
3764
|
+
* to encode words into a phonetic representation. The Caverphone algorithm is
|
|
3765
|
+
* used primarily in New Zealand and was developed to assist in the indexing of
|
|
3766
|
+
* names in genealogical databases.
|
|
3767
|
+
*
|
|
3768
|
+
* It converts words into a standardized phonetic code, allowing for variations
|
|
3769
|
+
* in spelling and pronunciation to be matched.
|
|
3770
|
+
*
|
|
3771
|
+
* @module Phonetic/Caverphone
|
|
3772
|
+
* @author Paul Köhler (komed3)
|
|
3773
|
+
* @license MIT
|
|
3774
|
+
*/
|
|
3775
|
+
/**
|
|
3776
|
+
* Caverphone class extends the Phonetic class to implement the Caverphone phonetic algorithm.
|
|
3777
|
+
*/
|
|
3778
|
+
class Caverphone extends Phonetic {
|
|
3779
|
+
// Default options for the Caverphone phonetic algorithm
|
|
3780
|
+
static default = {
|
|
3781
|
+
map: 'en2', delimiter: ' ', length: -1, pad: '', dedupe: false
|
|
3782
|
+
};
|
|
3783
|
+
/**
|
|
3784
|
+
* Constructor for the Caverphone class.
|
|
3785
|
+
*
|
|
3786
|
+
* Initializes the Caverphone phonetic algorithm with the mapping and options.
|
|
3787
|
+
*
|
|
3788
|
+
* @param {PhoneticOptions} [opt] - Options for the Caverphone phonetic algorithm
|
|
3789
|
+
*/
|
|
3790
|
+
constructor(opt = {}) { super('caverphone', opt); }
|
|
3791
|
+
/**
|
|
3792
|
+
* Generates the Caverphone code for a given word.
|
|
3793
|
+
*
|
|
3794
|
+
* @param {string} word - The input word to be converted into a Caverphone code
|
|
3795
|
+
* @returns {string} - The generated Caverphone code
|
|
3796
|
+
*/
|
|
3797
|
+
encode(word) {
|
|
3798
|
+
// Remove anything not A-Z and convert to lowercase
|
|
3799
|
+
word = word.replace(/[^A-Z]/gi, '').toLowerCase();
|
|
3800
|
+
// Use the base implementation for rule/mapping application
|
|
3801
|
+
return super.encode(word);
|
|
3802
|
+
}
|
|
3803
|
+
/**
|
|
3804
|
+
* Overrides the mapChar method to skip character mapping.
|
|
3805
|
+
*
|
|
3806
|
+
* @param {string} char - The character to be mapped
|
|
3807
|
+
* @returns {string} - The mapped character
|
|
3808
|
+
*/
|
|
3809
|
+
mapChar(char) { return char; }
|
|
3810
|
+
/**
|
|
3811
|
+
* Adjusts the phonetic code to uppercase.
|
|
3812
|
+
*
|
|
3813
|
+
* @param {string} code - The phonetic code to adjust
|
|
3814
|
+
* @returns {string} - The adjusted phonetic code
|
|
3815
|
+
*/
|
|
3816
|
+
adjustCode(code) { return code.toUpperCase(); }
|
|
3817
|
+
}
|
|
3818
|
+
// Register the Caverphone algorithm in the phonetic registry
|
|
3819
|
+
PhoneticRegistry.add('caverphone', Caverphone);
|
|
3820
|
+
// Register the Caverphone 1.0 phonetic mapping for English
|
|
3821
|
+
PhoneticMappingRegistry.add('caverphone', 'en1', {
|
|
3822
|
+
options: { length: 6, pad: '1' },
|
|
3823
|
+
map: {},
|
|
3824
|
+
patterns: [
|
|
3825
|
+
// Special word-initial replacements
|
|
3826
|
+
{ pattern: /^(c|r|t|en)ough/, replace: '$1ou2f' },
|
|
3827
|
+
{ pattern: /^gn/, replace: '2n' },
|
|
3828
|
+
// Special word-final replacement
|
|
3829
|
+
{ pattern: /mb$/, replace: 'm2' },
|
|
3830
|
+
// Character group replacements
|
|
3831
|
+
{ pattern: /cq/g, replace: '2q' },
|
|
3832
|
+
{ pattern: /c(e|i|y)/g, replace: 's$1' },
|
|
3833
|
+
{ pattern: /tch/g, replace: '2ch' },
|
|
3834
|
+
{ pattern: /[cqx]/g, replace: 'k' },
|
|
3835
|
+
{ pattern: /v/g, replace: 'f' },
|
|
3836
|
+
{ pattern: /dg/g, replace: '2g' },
|
|
3837
|
+
{ pattern: /ti(a|o)/g, replace: 'si$1' },
|
|
3838
|
+
{ pattern: /d/g, replace: 't' },
|
|
3839
|
+
{ pattern: /ph/g, replace: 'fh' },
|
|
3840
|
+
{ pattern: /b/g, replace: 'p' },
|
|
3841
|
+
{ pattern: /sh/g, replace: 's2' },
|
|
3842
|
+
{ pattern: /z/g, replace: 's' },
|
|
3843
|
+
// Vowel handling
|
|
3844
|
+
{ pattern: /^[aeiou]/, replace: 'A' },
|
|
3845
|
+
{ pattern: /[aeiou]/g, replace: '3' },
|
|
3846
|
+
// Special gh handling
|
|
3847
|
+
{ pattern: /3gh3/g, replace: '3kh3' },
|
|
3848
|
+
{ pattern: /gh/g, replace: '22' },
|
|
3849
|
+
// Single character replacements
|
|
3850
|
+
{ pattern: /g/g, replace: 'k' },
|
|
3851
|
+
// Collapse repeated consonants
|
|
3852
|
+
{ pattern: /s+/g, replace: 'S' },
|
|
3853
|
+
{ pattern: /t+/g, replace: 'T' },
|
|
3854
|
+
{ pattern: /p+/g, replace: 'P' },
|
|
3855
|
+
{ pattern: /k+/g, replace: 'K' },
|
|
3856
|
+
{ pattern: /f+/g, replace: 'F' },
|
|
3857
|
+
{ pattern: /m+/g, replace: 'M' },
|
|
3858
|
+
{ pattern: /n+/g, replace: 'N' },
|
|
3859
|
+
// Y and other single-letter handling
|
|
3860
|
+
{ pattern: /j/g, replace: 'y' },
|
|
3861
|
+
// L/R/W/Y3 handling
|
|
3862
|
+
{ pattern: /l3/g, replace: 'L3' },
|
|
3863
|
+
{ pattern: /r3/g, replace: 'R3' },
|
|
3864
|
+
{ pattern: /w3/g, replace: 'W3' },
|
|
3865
|
+
{ pattern: /y3/g, replace: 'Y3' },
|
|
3866
|
+
// L/R/W followed by y
|
|
3867
|
+
{ pattern: /ly/g, replace: 'Ly' },
|
|
3868
|
+
{ pattern: /ry/g, replace: 'Ry' },
|
|
3869
|
+
{ pattern: /wy/g, replace: 'Wy' },
|
|
3870
|
+
// WH handling
|
|
3871
|
+
{ pattern: /wh3/g, replace: 'Wh3' },
|
|
3872
|
+
{ pattern: /why/g, replace: 'Why' },
|
|
3873
|
+
// H at start
|
|
3874
|
+
{ pattern: /^h/, replace: 'A' },
|
|
3875
|
+
// Remove certain letters
|
|
3876
|
+
{ pattern: /[hlrwy23]/g, replace: '' }
|
|
3877
|
+
]
|
|
3878
|
+
});
|
|
3879
|
+
// Register the Caverphone 2.0 phonetic mapping for English
|
|
3880
|
+
PhoneticMappingRegistry.add('caverphone', 'en2', {
|
|
3881
|
+
options: { length: 10, pad: '1' },
|
|
3882
|
+
map: {},
|
|
3883
|
+
patterns: [
|
|
3884
|
+
// Remove trailing 'e'
|
|
3885
|
+
{ pattern: /e$/, replace: '' },
|
|
3886
|
+
// Special word-initial replacements
|
|
3887
|
+
{ pattern: /^(c|r|t|en|tr)ough/, replace: '$1ou2f' },
|
|
3888
|
+
{ pattern: /^gn/, replace: '2n' },
|
|
3889
|
+
// Special word-final replacement
|
|
3890
|
+
{ pattern: /mb$/, replace: 'm2' },
|
|
3891
|
+
// Character group replacements
|
|
3892
|
+
{ pattern: /cq/g, replace: '2q' },
|
|
3893
|
+
{ pattern: /c(e|i|y)/g, replace: 's$1' },
|
|
3894
|
+
{ pattern: /tch/g, replace: '2ch' },
|
|
3895
|
+
{ pattern: /[cqx]/g, replace: 'k' },
|
|
3896
|
+
{ pattern: /v/g, replace: 'f' },
|
|
3897
|
+
{ pattern: /dg/g, replace: '2g' },
|
|
3898
|
+
{ pattern: /ti(a|o)/g, replace: 'si$1' },
|
|
3899
|
+
{ pattern: /d/g, replace: 't' },
|
|
3900
|
+
{ pattern: /ph/g, replace: 'fh' },
|
|
3901
|
+
{ pattern: /b/g, replace: 'p' },
|
|
3902
|
+
{ pattern: /sh/g, replace: 's2' },
|
|
3903
|
+
{ pattern: /z/g, replace: 's' },
|
|
3904
|
+
// Vowel handling
|
|
3905
|
+
{ pattern: /^[aeiou]/, replace: 'A' },
|
|
3906
|
+
{ pattern: /[aeiou]/g, replace: '3' },
|
|
3907
|
+
// Y handling
|
|
3908
|
+
{ pattern: /j/g, replace: 'y' },
|
|
3909
|
+
{ pattern: /^y3/, replace: 'Y3' },
|
|
3910
|
+
{ pattern: /^y/, replace: 'A' },
|
|
3911
|
+
{ pattern: /y/g, replace: '3' },
|
|
3912
|
+
// Special gh handling
|
|
3913
|
+
{ pattern: /3gh3/g, replace: '3kh3' },
|
|
3914
|
+
{ pattern: /gh/g, replace: '22' },
|
|
3915
|
+
// Single character replacements
|
|
3916
|
+
{ pattern: /g/g, replace: 'k' },
|
|
3917
|
+
// Collapse repeated consonants
|
|
3918
|
+
{ pattern: /s+/g, replace: 'S' },
|
|
3919
|
+
{ pattern: /t+/g, replace: 'T' },
|
|
3920
|
+
{ pattern: /p+/g, replace: 'P' },
|
|
3921
|
+
{ pattern: /k+/g, replace: 'K' },
|
|
3922
|
+
{ pattern: /f+/g, replace: 'F' },
|
|
3923
|
+
{ pattern: /m+/g, replace: 'M' },
|
|
3924
|
+
{ pattern: /n+/g, replace: 'N' },
|
|
3925
|
+
// L/R/W3 handling
|
|
3926
|
+
{ pattern: /l3/g, replace: 'L3' },
|
|
3927
|
+
{ pattern: /r3/g, replace: 'R3' },
|
|
3928
|
+
{ pattern: /w3/g, replace: 'W3' },
|
|
3929
|
+
{ pattern: /wh3/g, replace: 'Wh3' },
|
|
3930
|
+
{ pattern: /[lrw]$/, replace: '3' },
|
|
3931
|
+
// // H at start and final 3 handling
|
|
3932
|
+
{ pattern: /^h/, replace: 'A' },
|
|
3933
|
+
{ pattern: /3$/, replace: 'A' },
|
|
3934
|
+
// Remove certain letters
|
|
3935
|
+
{ pattern: /[hlrw23]/g, replace: '' }
|
|
3936
|
+
]
|
|
3937
|
+
});
|
|
3938
|
+
|
|
3719
3939
|
/**
|
|
3720
3940
|
* Cologne Phonetic Algorithm
|
|
3721
3941
|
* src/phonetic/Cologne.ts
|
|
@@ -4264,6 +4484,12 @@
|
|
|
4264
4484
|
// Prepare the input
|
|
4265
4485
|
const A = skip ? a : this.prepare(a, resolved);
|
|
4266
4486
|
const B = skip ? b : this.prepare(b, resolved);
|
|
4487
|
+
// If the inputs are empty and safeEmpty is enabled, return an empty array
|
|
4488
|
+
if (resolved.safeEmpty && ((Array.isArray(A) && A.length === 0) ||
|
|
4489
|
+
(Array.isArray(B) && B.length === 0) ||
|
|
4490
|
+
A === '' || B === '')) {
|
|
4491
|
+
return [];
|
|
4492
|
+
}
|
|
4267
4493
|
// Get the metric class
|
|
4268
4494
|
const metric = factory.metric(resolved.metric, A, B, resolved.opt);
|
|
4269
4495
|
// Pass the original inputs to the metric
|
|
@@ -4694,6 +4920,12 @@
|
|
|
4694
4920
|
// Prepare the input
|
|
4695
4921
|
const A = skip ? a : await this.prepareAsync(a, resolved);
|
|
4696
4922
|
const B = skip ? b : await this.prepareAsync(b, resolved);
|
|
4923
|
+
// If the inputs are empty and safeEmpty is enabled, return an empty array
|
|
4924
|
+
if (resolved.safeEmpty && ((Array.isArray(A) && A.length === 0) ||
|
|
4925
|
+
(Array.isArray(B) && B.length === 0) ||
|
|
4926
|
+
A === '' || B === '')) {
|
|
4927
|
+
return [];
|
|
4928
|
+
}
|
|
4697
4929
|
// Get the metric class
|
|
4698
4930
|
const metric = factory.metric(resolved.metric, A, B, resolved.opt);
|
|
4699
4931
|
// Pass the original inputs to the metric
|