cmpstr 3.0.1 → 3.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -1
- package/dist/CmpStr.esm.js +257 -25
- package/dist/CmpStr.esm.js.map +1 -1
- package/dist/CmpStr.esm.min.js +2 -2
- package/dist/CmpStr.esm.min.js.map +1 -1
- package/dist/CmpStr.umd.js +257 -25
- package/dist/CmpStr.umd.js.map +1 -1
- package/dist/CmpStr.umd.min.js +2 -2
- package/dist/CmpStr.umd.min.js.map +1 -1
- package/dist/cjs/CmpStr.cjs +12 -1
- package/dist/cjs/CmpStr.cjs.map +1 -1
- package/dist/cjs/CmpStrAsync.cjs +11 -1
- package/dist/cjs/CmpStrAsync.cjs.map +1 -1
- package/dist/cjs/index.cjs +1 -1
- package/dist/cjs/metric/Cosine.cjs +1 -1
- package/dist/cjs/metric/Cosine.cjs.map +1 -1
- package/dist/cjs/metric/DamerauLevenshtein.cjs +1 -1
- package/dist/cjs/metric/DamerauLevenshtein.cjs.map +1 -1
- package/dist/cjs/metric/DiceSorensen.cjs +1 -1
- package/dist/cjs/metric/DiceSorensen.cjs.map +1 -1
- package/dist/cjs/metric/Hamming.cjs +2 -2
- package/dist/cjs/metric/Hamming.cjs.map +1 -1
- package/dist/cjs/metric/Jaccard.cjs +1 -1
- package/dist/cjs/metric/Jaccard.cjs.map +1 -1
- package/dist/cjs/metric/JaroWinkler.cjs +1 -1
- package/dist/cjs/metric/JaroWinkler.cjs.map +1 -1
- package/dist/cjs/metric/LCS.cjs +1 -1
- package/dist/cjs/metric/LCS.cjs.map +1 -1
- package/dist/cjs/metric/Levenshtein.cjs +1 -1
- package/dist/cjs/metric/Levenshtein.cjs.map +1 -1
- package/dist/cjs/metric/Metric.cjs +6 -6
- package/dist/cjs/metric/Metric.cjs.map +1 -1
- package/dist/cjs/metric/NeedlemanWunsch.cjs +1 -1
- package/dist/cjs/metric/NeedlemanWunsch.cjs.map +1 -1
- package/dist/cjs/metric/SmithWaterman.cjs +1 -1
- package/dist/cjs/metric/SmithWaterman.cjs.map +1 -1
- package/dist/cjs/metric/qGram.cjs +1 -1
- package/dist/cjs/metric/qGram.cjs.map +1 -1
- package/dist/cjs/phonetic/Caverphone.cjs +199 -0
- package/dist/cjs/phonetic/Caverphone.cjs.map +1 -0
- package/dist/cjs/phonetic/Cologne.cjs +1 -1
- package/dist/cjs/phonetic/Cologne.cjs.map +1 -1
- package/dist/cjs/phonetic/Metaphone.cjs +1 -1
- package/dist/cjs/phonetic/Metaphone.cjs.map +1 -1
- package/dist/cjs/phonetic/Phonetic.cjs +50 -16
- package/dist/cjs/phonetic/Phonetic.cjs.map +1 -1
- package/dist/cjs/phonetic/Soundex.cjs +1 -1
- package/dist/cjs/phonetic/Soundex.cjs.map +1 -1
- package/dist/cjs/utils/DeepMerge.cjs +2 -2
- package/dist/cjs/utils/DeepMerge.cjs.map +1 -1
- package/dist/cjs/utils/DiffChecker.cjs +10 -10
- package/dist/cjs/utils/DiffChecker.cjs.map +1 -1
- package/dist/cjs/utils/Filter.cjs +1 -1
- package/dist/cjs/utils/Filter.cjs.map +1 -1
- package/dist/cjs/utils/HashTable.cjs +1 -1
- package/dist/cjs/utils/HashTable.cjs.map +1 -1
- package/dist/cjs/utils/Normalizer.cjs +1 -1
- package/dist/cjs/utils/Normalizer.cjs.map +1 -1
- package/dist/cjs/utils/Pool.cjs +1 -1
- package/dist/cjs/utils/Pool.cjs.map +1 -1
- package/dist/cjs/utils/Profiler.cjs +3 -3
- package/dist/cjs/utils/Profiler.cjs.map +1 -1
- package/dist/cjs/utils/Registry.cjs +7 -7
- package/dist/cjs/utils/Registry.cjs.map +1 -1
- package/dist/cjs/utils/TextAnalyzer.cjs +2 -2
- package/dist/cjs/utils/TextAnalyzer.cjs.map +1 -1
- package/dist/esm/{CmpStr.js → CmpStr.mjs} +36 -25
- package/dist/esm/CmpStr.mjs.map +1 -0
- package/dist/esm/{CmpStrAsync.js → CmpStrAsync.mjs} +16 -6
- package/dist/esm/CmpStrAsync.mjs.map +1 -0
- package/dist/esm/index.mjs +7 -0
- package/dist/esm/index.mjs.map +1 -0
- package/dist/esm/metric/{Cosine.js → Cosine.mjs} +4 -4
- package/dist/esm/metric/Cosine.mjs.map +1 -0
- package/dist/esm/metric/{DamerauLevenshtein.js → DamerauLevenshtein.mjs} +4 -4
- package/dist/esm/metric/DamerauLevenshtein.mjs.map +1 -0
- package/dist/esm/metric/{DiceSorensen.js → DiceSorensen.mjs} +4 -4
- package/dist/esm/metric/DiceSorensen.mjs.map +1 -0
- package/dist/esm/metric/{Hamming.js → Hamming.mjs} +4 -4
- package/dist/esm/metric/Hamming.mjs.map +1 -0
- package/dist/esm/metric/{Jaccard.js → Jaccard.mjs} +4 -4
- package/dist/esm/metric/Jaccard.mjs.map +1 -0
- package/dist/esm/metric/{JaroWinkler.js → JaroWinkler.mjs} +4 -4
- package/dist/esm/metric/JaroWinkler.mjs.map +1 -0
- package/dist/esm/metric/{LCS.js → LCS.mjs} +4 -4
- package/dist/esm/metric/LCS.mjs.map +1 -0
- package/dist/esm/metric/{Levenshtein.js → Levenshtein.mjs} +4 -4
- package/dist/esm/metric/Levenshtein.mjs.map +1 -0
- package/dist/esm/metric/{Metric.js → Metric.mjs} +10 -10
- package/dist/esm/metric/Metric.mjs.map +1 -0
- package/dist/esm/metric/{NeedlemanWunsch.js → NeedlemanWunsch.mjs} +4 -4
- package/dist/esm/metric/NeedlemanWunsch.mjs.map +1 -0
- package/dist/esm/metric/{SmithWaterman.js → SmithWaterman.mjs} +4 -4
- package/dist/esm/metric/SmithWaterman.mjs.map +1 -0
- package/dist/esm/metric/{qGram.js → qGram.mjs} +4 -4
- package/dist/esm/metric/qGram.mjs.map +1 -0
- package/dist/esm/phonetic/Caverphone.mjs +201 -0
- package/dist/esm/phonetic/Caverphone.mjs.map +1 -0
- package/dist/esm/phonetic/{Cologne.js → Cologne.mjs} +3 -3
- package/dist/esm/phonetic/Cologne.mjs.map +1 -0
- package/dist/esm/phonetic/{Metaphone.js → Metaphone.mjs} +3 -3
- package/dist/esm/phonetic/Metaphone.mjs.map +1 -0
- package/dist/esm/phonetic/{Phonetic.js → Phonetic.mjs} +52 -21
- package/dist/esm/phonetic/Phonetic.mjs.map +1 -0
- package/dist/esm/phonetic/{Soundex.js → Soundex.mjs} +3 -3
- package/dist/esm/phonetic/Soundex.mjs.map +1 -0
- package/dist/esm/utils/{DeepMerge.js → DeepMerge.mjs} +3 -3
- package/dist/esm/utils/DeepMerge.mjs.map +1 -0
- package/dist/esm/utils/{DiffChecker.js → DiffChecker.mjs} +11 -11
- package/dist/esm/utils/DiffChecker.mjs.map +1 -0
- package/dist/esm/utils/{Filter.js → Filter.mjs} +2 -2
- package/dist/esm/utils/Filter.mjs.map +1 -0
- package/dist/esm/utils/{HashTable.js → HashTable.mjs} +2 -2
- package/dist/esm/utils/HashTable.mjs.map +1 -0
- package/dist/esm/utils/{Normalizer.js → Normalizer.mjs} +3 -3
- package/dist/esm/utils/Normalizer.mjs.map +1 -0
- package/dist/esm/utils/{Pool.js → Pool.mjs} +2 -2
- package/dist/esm/utils/Pool.mjs.map +1 -0
- package/dist/esm/utils/{Profiler.js → Profiler.mjs} +4 -4
- package/dist/esm/utils/Profiler.mjs.map +1 -0
- package/dist/esm/utils/{Registry.js → Registry.mjs} +8 -8
- package/dist/esm/utils/Registry.mjs.map +1 -0
- package/dist/esm/utils/{TextAnalyzer.js → TextAnalyzer.mjs} +3 -3
- package/dist/esm/utils/TextAnalyzer.mjs.map +1 -0
- package/dist/types/index.d.ts +3 -2
- package/dist/types/phonetic/Caverphone.d.ts +55 -0
- package/dist/types/phonetic/Phonetic.d.ts +14 -2
- package/dist/types/phonetic/index.d.ts +1 -0
- package/dist/types/utils/Types.d.ts +12 -0
- package/package.json +15 -13
- package/dist/esm/CmpStr.js.map +0 -1
- package/dist/esm/CmpStrAsync.js.map +0 -1
- package/dist/esm/index.js +0 -7
- package/dist/esm/index.js.map +0 -1
- package/dist/esm/metric/Cosine.js.map +0 -1
- package/dist/esm/metric/DamerauLevenshtein.js.map +0 -1
- package/dist/esm/metric/DiceSorensen.js.map +0 -1
- package/dist/esm/metric/Hamming.js.map +0 -1
- package/dist/esm/metric/Jaccard.js.map +0 -1
- package/dist/esm/metric/JaroWinkler.js.map +0 -1
- package/dist/esm/metric/LCS.js.map +0 -1
- package/dist/esm/metric/Levenshtein.js.map +0 -1
- package/dist/esm/metric/Metric.js.map +0 -1
- package/dist/esm/metric/NeedlemanWunsch.js.map +0 -1
- package/dist/esm/metric/SmithWaterman.js.map +0 -1
- package/dist/esm/metric/qGram.js.map +0 -1
- package/dist/esm/phonetic/Cologne.js.map +0 -1
- package/dist/esm/phonetic/Metaphone.js.map +0 -1
- package/dist/esm/phonetic/Phonetic.js.map +0 -1
- package/dist/esm/phonetic/Soundex.js.map +0 -1
- package/dist/esm/utils/DeepMerge.js.map +0 -1
- package/dist/esm/utils/DiffChecker.js.map +0 -1
- package/dist/esm/utils/Filter.js.map +0 -1
- package/dist/esm/utils/HashTable.js.map +0 -1
- package/dist/esm/utils/Normalizer.js.map +0 -1
- package/dist/esm/utils/Pool.js.map +0 -1
- package/dist/esm/utils/Profiler.js.map +0 -1
- package/dist/esm/utils/Registry.js.map +0 -1
- package/dist/esm/utils/TextAnalyzer.js.map +0 -1
package/README.md
CHANGED
|
@@ -68,8 +68,14 @@ console.log( result );
|
|
|
68
68
|
// [ 'Meyer', 'Meier' ]
|
|
69
69
|
```
|
|
70
70
|
|
|
71
|
+
_Try with [OneCompiler](https://onecompiler.com/nodejs/43qr6trny)._
|
|
72
|
+
|
|
73
|
+
## CLI Tool
|
|
74
|
+
|
|
75
|
+
Try out or use CmpStr on the terminal. Install the **[cmpstr-cli](https://npmjs.com/package/cmpstr-cli)** package and use many features of CmpStr directly on the console via the cmpstr command. Many options and parameters also make the command suitable for scripts and automatic processing.
|
|
76
|
+
|
|
71
77
|
## Documentation
|
|
72
78
|
|
|
73
79
|
The full documentation, API reference and advanced usage examples are available in the [GitHub Wiki](https://github.com/komed3/cmpstr/wiki).
|
|
74
80
|
|
|
75
|
-
**LICENSE MIT © 2023-2025 PAUL KÖHLER (KOMED3)**
|
|
81
|
+
**LICENSE MIT © 2023-2025 PAUL KÖHLER (KOMED3)**
|
package/dist/CmpStr.esm.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* CmpStr v3.0.
|
|
2
|
+
* CmpStr v3.0.3 build-462b952-250813
|
|
3
3
|
* This is a lightweight, fast and well performing library for calculating string similarity.
|
|
4
4
|
* (c) 2023-2025 Paul Köhler @komed3 / MIT License
|
|
5
5
|
* Visit https://github.com/komed3/cmpstr and https://npmjs.org/package/cmpstr
|
|
@@ -61,7 +61,7 @@ function set(t, path, value) {
|
|
|
61
61
|
const [k, ...r] = parse(path);
|
|
62
62
|
// Throw an error if the key is not a valid identifier
|
|
63
63
|
if (t !== undefined && (typeof t !== 'object' || t === null))
|
|
64
|
-
throw Error(`
|
|
64
|
+
throw Error(`Cannot set property <${k}> of <${JSON.stringify(t)}>`);
|
|
65
65
|
// Assign the value to the specified key in the object
|
|
66
66
|
return Object.assign(t ?? (typeof k === 'number' ? [] : Object.create(null)), {
|
|
67
67
|
[k]: set(t?.[k], r.join('.'), value)
|
|
@@ -1680,7 +1680,7 @@ const factory = Object.create(null);
|
|
|
1680
1680
|
function Registry(reg, ctor) {
|
|
1681
1681
|
// Throws an error if the registry already exists
|
|
1682
1682
|
if (reg in registry || reg in factory)
|
|
1683
|
-
throw new Error(`
|
|
1683
|
+
throw new Error(`Registry <${reg}> already exists / overwriting is forbidden`);
|
|
1684
1684
|
// Create a registry object to hold class constructors
|
|
1685
1685
|
const classes = Object.create(null);
|
|
1686
1686
|
const service = {
|
|
@@ -1695,9 +1695,9 @@ function Registry(reg, ctor) {
|
|
|
1695
1695
|
*/
|
|
1696
1696
|
add(name, cls, update = false) {
|
|
1697
1697
|
if (!(cls.prototype instanceof ctor))
|
|
1698
|
-
throw new TypeError(`
|
|
1698
|
+
throw new TypeError(`Class must extend <${reg}>`);
|
|
1699
1699
|
if (!update && name in classes)
|
|
1700
|
-
throw new Error(`
|
|
1700
|
+
throw new Error(`Entry <${name}> already exists / use <update=true> to overwrite`);
|
|
1701
1701
|
classes[name] = cls;
|
|
1702
1702
|
},
|
|
1703
1703
|
/**
|
|
@@ -1728,7 +1728,7 @@ function Registry(reg, ctor) {
|
|
|
1728
1728
|
*/
|
|
1729
1729
|
get(name) {
|
|
1730
1730
|
if (!(name in classes))
|
|
1731
|
-
throw new Error(`
|
|
1731
|
+
throw new Error(`Class <${name}> not registered for <${reg}>`);
|
|
1732
1732
|
return classes[name];
|
|
1733
1733
|
}
|
|
1734
1734
|
};
|
|
@@ -1749,7 +1749,7 @@ function Registry(reg, ctor) {
|
|
|
1749
1749
|
*/
|
|
1750
1750
|
function resolveCls(reg, cls) {
|
|
1751
1751
|
if (!(reg in registry))
|
|
1752
|
-
throw new ReferenceError(`
|
|
1752
|
+
throw new ReferenceError(`Registry <${reg}> does not exist`);
|
|
1753
1753
|
return (typeof cls === 'string' ? registry[reg]?.get(cls) : cls);
|
|
1754
1754
|
}
|
|
1755
1755
|
/**
|
|
@@ -1767,7 +1767,9 @@ function createFromRegistry(reg, cls, ...args) {
|
|
|
1767
1767
|
return new cls(...args);
|
|
1768
1768
|
}
|
|
1769
1769
|
catch (err) {
|
|
1770
|
-
throw new Error(`
|
|
1770
|
+
throw new Error(`Cannot instantiate class <${cls}>`, {
|
|
1771
|
+
cause: err
|
|
1772
|
+
});
|
|
1771
1773
|
}
|
|
1772
1774
|
}
|
|
1773
1775
|
|
|
@@ -1865,7 +1867,7 @@ class Metric {
|
|
|
1865
1867
|
this.b = Array.isArray(b) ? b : [b];
|
|
1866
1868
|
// Validate inputs: ensure they are not empty
|
|
1867
1869
|
if (this.a.length === 0 || this.b.length === 0)
|
|
1868
|
-
throw new Error(`
|
|
1870
|
+
throw new Error(`Inputs <a> and <b> must not be empty`);
|
|
1869
1871
|
// Set options
|
|
1870
1872
|
this.options = opt;
|
|
1871
1873
|
this.symmetric = symmetric;
|
|
@@ -1902,7 +1904,7 @@ class Metric {
|
|
|
1902
1904
|
* @throws {Error} - If not overridden in a subclass
|
|
1903
1905
|
*/
|
|
1904
1906
|
compute(a, b, m, n, maxLen) {
|
|
1905
|
-
throw new Error(`
|
|
1907
|
+
throw new Error(`Method compute() must be overridden in a subclass`);
|
|
1906
1908
|
}
|
|
1907
1909
|
/**
|
|
1908
1910
|
* Run the metric computation for single inputs (two strings).
|
|
@@ -2059,7 +2061,7 @@ class Metric {
|
|
|
2059
2061
|
*/
|
|
2060
2062
|
isPairwise(safe = false) {
|
|
2061
2063
|
return this.isBatch() && this.a.length === this.b.length ? true : !safe && (() => {
|
|
2062
|
-
throw new Error(`
|
|
2064
|
+
throw new Error(`Mode <pairwise> requires arrays of equal length`);
|
|
2063
2065
|
})();
|
|
2064
2066
|
}
|
|
2065
2067
|
/**
|
|
@@ -2120,7 +2122,7 @@ class Metric {
|
|
|
2120
2122
|
this.runPairwise();
|
|
2121
2123
|
break;
|
|
2122
2124
|
// Unsupported mode
|
|
2123
|
-
default: throw new Error(`
|
|
2125
|
+
default: throw new Error(`Unsupported mode <${mode}>`);
|
|
2124
2126
|
}
|
|
2125
2127
|
}
|
|
2126
2128
|
/**
|
|
@@ -2155,7 +2157,7 @@ class Metric {
|
|
|
2155
2157
|
await this.runPairwiseAsync();
|
|
2156
2158
|
break;
|
|
2157
2159
|
// Unsupported mode
|
|
2158
|
-
default: throw new Error(`
|
|
2160
|
+
default: throw new Error(`Unsupported async mode <${mode}>`);
|
|
2159
2161
|
}
|
|
2160
2162
|
}
|
|
2161
2163
|
/**
|
|
@@ -2698,7 +2700,7 @@ class HammingDistance extends Metric {
|
|
|
2698
2700
|
}
|
|
2699
2701
|
// Standard: Error for unequal length
|
|
2700
2702
|
else
|
|
2701
|
-
throw new Error(`
|
|
2703
|
+
throw new Error(`Strings must be of equal length for Hamming Distance, a=${m} and b=${n} given, ` +
|
|
2702
2704
|
`use option.pad for automatic adjustment`);
|
|
2703
2705
|
}
|
|
2704
2706
|
// Calculate the Hamming distance
|
|
@@ -3347,8 +3349,8 @@ MetricRegistry.add('smithWaterman', SmithWatermanDistance);
|
|
|
3347
3349
|
* pose a risk of infringing upon existing trademarks due to their pronunciation.
|
|
3348
3350
|
*
|
|
3349
3351
|
* This module provides an abstract class for generating phonetic indices based
|
|
3350
|
-
* on mappings and rules. It allows for the implementation of various
|
|
3351
|
-
* algorithms by extending the abstract class.
|
|
3352
|
+
* on mappings, patterns and rules. It allows for the implementation of various
|
|
3353
|
+
* phonetic algorithms by extending the abstract class.
|
|
3352
3354
|
*
|
|
3353
3355
|
* @module Phonetic
|
|
3354
3356
|
* @author Paul Köhler (komed3)
|
|
@@ -3388,22 +3390,55 @@ class Phonetic {
|
|
|
3388
3390
|
* Constructor for the Phonetic class.
|
|
3389
3391
|
*
|
|
3390
3392
|
* Initializes the phonetic algorithm with the specified options and mapping.
|
|
3393
|
+
* Options hierarchy: User input > mapping options > default
|
|
3391
3394
|
*
|
|
3392
3395
|
* @param {string} algo - The name of the algorithm (e.g. 'soundex')
|
|
3393
3396
|
* @param {PhoneticOptions} [opt] - Options for the phonetic algorithm
|
|
3394
3397
|
* @throws {Error} - If the requested mapping is not declared
|
|
3395
3398
|
*/
|
|
3396
3399
|
constructor(algo, opt = {}) {
|
|
3397
|
-
//
|
|
3398
|
-
|
|
3399
|
-
//
|
|
3400
|
-
const
|
|
3400
|
+
// Get the phonetic default options
|
|
3401
|
+
const defaults = this.constructor.default ?? {};
|
|
3402
|
+
// Determine phonetic map ID from options or use defaults
|
|
3403
|
+
const mapId = opt.map ?? defaults.map;
|
|
3404
|
+
// If no algorithm is specified, throw an error
|
|
3405
|
+
if (!mapId)
|
|
3406
|
+
throw new Error(`No mapping specified for phonetic algorithm`);
|
|
3407
|
+
// Get the mapping based on the determined map ID
|
|
3408
|
+
const map = PhoneticMappingRegistry.get(algo, mapId);
|
|
3401
3409
|
// If the mapping is not defined, throw an error
|
|
3402
3410
|
if (map === undefined)
|
|
3403
|
-
throw new Error(`
|
|
3411
|
+
throw new Error(`Requested mapping <${mapId}> is not declared`);
|
|
3412
|
+
// Set the options by merging the default options with the provided ones
|
|
3413
|
+
this.options = merge(merge(defaults, map.options ?? {}), opt);
|
|
3414
|
+
// Set the algorithm name and mapping
|
|
3404
3415
|
this.algo = algo;
|
|
3405
3416
|
this.map = map;
|
|
3406
3417
|
}
|
|
3418
|
+
/**
|
|
3419
|
+
* Applies patterns to a word based on the phonetic map.
|
|
3420
|
+
*
|
|
3421
|
+
* This method processes the word by applying all defined patterns in the
|
|
3422
|
+
* phonetic map. It replaces occurrences of specified patterns with their
|
|
3423
|
+
* corresponding replacements.
|
|
3424
|
+
*
|
|
3425
|
+
* @param {string} word - The input word to be processed
|
|
3426
|
+
* @returns {string} - The modified word after applying all patterns
|
|
3427
|
+
*/
|
|
3428
|
+
applyPattern(word) {
|
|
3429
|
+
const { patterns = [] } = this.map;
|
|
3430
|
+
// If no patterns are provided, return the input
|
|
3431
|
+
if (!patterns || !patterns.length)
|
|
3432
|
+
return word;
|
|
3433
|
+
// Iterate over the patterns and replace all matches
|
|
3434
|
+
for (const { pattern, replace, all = false } of patterns) {
|
|
3435
|
+
// Search for the pattern in the word and replace it
|
|
3436
|
+
// Use replaceAll if 'all' is true, otherwise use replace
|
|
3437
|
+
word = word[all ? 'replaceAll' : 'replace'](pattern, replace);
|
|
3438
|
+
}
|
|
3439
|
+
// Return the modified word after applying all patterns
|
|
3440
|
+
return word;
|
|
3441
|
+
}
|
|
3407
3442
|
/**
|
|
3408
3443
|
* Applies phonetic rules to a character in a word context.
|
|
3409
3444
|
*
|
|
@@ -3481,6 +3516,9 @@ class Phonetic {
|
|
|
3481
3516
|
*/
|
|
3482
3517
|
encode(word) {
|
|
3483
3518
|
const { map = {}, ignore = [] } = this.map;
|
|
3519
|
+
// Apply patterns to the word before processing
|
|
3520
|
+
// This allows for pre-processing of the word based on defined patterns
|
|
3521
|
+
word = this.applyPattern(word);
|
|
3484
3522
|
// Get the characters of the word and its length
|
|
3485
3523
|
const chars = this.word2Chars(word);
|
|
3486
3524
|
const charLen = chars.length;
|
|
@@ -3517,11 +3555,11 @@ class Phonetic {
|
|
|
3517
3555
|
* @returns {string|undefined} - The phonetic code or undefined if no code applies
|
|
3518
3556
|
*/
|
|
3519
3557
|
mapChar(char, i, chars, charLen, lastCode, map) {
|
|
3520
|
-
const { dedupe = true } = this.options;
|
|
3558
|
+
const { dedupe = true, fallback = undefined } = this.options;
|
|
3521
3559
|
// Apply phonetic rules to the character
|
|
3522
3560
|
// If no rules apply, use the mapping
|
|
3523
|
-
// If the character is not in the mapping, return
|
|
3524
|
-
const c = this.applyRules(char, i, chars, charLen) ?? map[char] ??
|
|
3561
|
+
// If the character is not in the mapping, return the fallback
|
|
3562
|
+
const c = this.applyRules(char, i, chars, charLen) ?? map[char] ?? fallback;
|
|
3525
3563
|
// De-duplicate the code if necessary
|
|
3526
3564
|
return dedupe && c === lastCode ? undefined : c;
|
|
3527
3565
|
}
|
|
@@ -3674,7 +3712,7 @@ const PhoneticMappingRegistry = (() => {
|
|
|
3674
3712
|
add(algo, id, map, update = false) {
|
|
3675
3713
|
const mappings = maps(algo);
|
|
3676
3714
|
if (!update && id in mappings)
|
|
3677
|
-
throw new Error(`
|
|
3715
|
+
throw new Error(`Entry <${id}> already exists / use <update=true> to overwrite`);
|
|
3678
3716
|
mappings[id] = map;
|
|
3679
3717
|
},
|
|
3680
3718
|
/**
|
|
@@ -3710,6 +3748,188 @@ const PhoneticMappingRegistry = (() => {
|
|
|
3710
3748
|
};
|
|
3711
3749
|
})();
|
|
3712
3750
|
|
|
3751
|
+
/**
|
|
3752
|
+
* Caverphone Phonetic Algorithm
|
|
3753
|
+
* src/phonetic/Caverphone.ts
|
|
3754
|
+
*
|
|
3755
|
+
* @see https://en.wikipedia.org/wiki/Caverphone
|
|
3756
|
+
*
|
|
3757
|
+
* This module implements the Caverphone phonetic algorithm, which is designed
|
|
3758
|
+
* to encode words into a phonetic representation. The Caverphone algorithm is
|
|
3759
|
+
* used primarily in New Zealand and was developed to assist in the indexing of
|
|
3760
|
+
* names in genealogical databases.
|
|
3761
|
+
*
|
|
3762
|
+
* It converts words into a standardized phonetic code, allowing for variations
|
|
3763
|
+
* in spelling and pronunciation to be matched.
|
|
3764
|
+
*
|
|
3765
|
+
* @module Phonetic/Caverphone
|
|
3766
|
+
* @author Paul Köhler (komed3)
|
|
3767
|
+
* @license MIT
|
|
3768
|
+
*/
|
|
3769
|
+
/**
|
|
3770
|
+
* Caverphone class extends the Phonetic class to implement the Caverphone phonetic algorithm.
|
|
3771
|
+
*/
|
|
3772
|
+
class Caverphone extends Phonetic {
|
|
3773
|
+
// Default options for the Caverphone phonetic algorithm
|
|
3774
|
+
static default = {
|
|
3775
|
+
map: 'en2', delimiter: ' ', length: -1, pad: '', dedupe: false
|
|
3776
|
+
};
|
|
3777
|
+
/**
|
|
3778
|
+
* Constructor for the Caverphone class.
|
|
3779
|
+
*
|
|
3780
|
+
* Initializes the Caverphone phonetic algorithm with the mapping and options.
|
|
3781
|
+
*
|
|
3782
|
+
* @param {PhoneticOptions} [opt] - Options for the Caverphone phonetic algorithm
|
|
3783
|
+
*/
|
|
3784
|
+
constructor(opt = {}) { super('caverphone', opt); }
|
|
3785
|
+
/**
|
|
3786
|
+
* Generates the Caverphone code for a given word.
|
|
3787
|
+
*
|
|
3788
|
+
* @param {string} word - The input word to be converted into a Caverphone code
|
|
3789
|
+
* @returns {string} - The generated Caverphone code
|
|
3790
|
+
*/
|
|
3791
|
+
encode(word) {
|
|
3792
|
+
// Remove anything not A-Z and convert to lowercase
|
|
3793
|
+
word = word.replace(/[^A-Z]/gi, '').toLowerCase();
|
|
3794
|
+
// Use the base implementation for rule/mapping application
|
|
3795
|
+
return super.encode(word);
|
|
3796
|
+
}
|
|
3797
|
+
/**
|
|
3798
|
+
* Overrides the mapChar method to skip character mapping.
|
|
3799
|
+
*
|
|
3800
|
+
* @param {string} char - The character to be mapped
|
|
3801
|
+
* @returns {string} - The mapped character
|
|
3802
|
+
*/
|
|
3803
|
+
mapChar(char) { return char; }
|
|
3804
|
+
/**
|
|
3805
|
+
* Adjusts the phonetic code to uppercase.
|
|
3806
|
+
*
|
|
3807
|
+
* @param {string} code - The phonetic code to adjust
|
|
3808
|
+
* @returns {string} - The adjusted phonetic code
|
|
3809
|
+
*/
|
|
3810
|
+
adjustCode(code) { return code.toUpperCase(); }
|
|
3811
|
+
}
|
|
3812
|
+
// Register the Caverphone algorithm in the phonetic registry
|
|
3813
|
+
PhoneticRegistry.add('caverphone', Caverphone);
|
|
3814
|
+
// Register the Caverphone 1.0 phonetic mapping for English
|
|
3815
|
+
PhoneticMappingRegistry.add('caverphone', 'en1', {
|
|
3816
|
+
options: { length: 6, pad: '1' },
|
|
3817
|
+
map: {},
|
|
3818
|
+
patterns: [
|
|
3819
|
+
// Special word-initial replacements
|
|
3820
|
+
{ pattern: /^(c|r|t|en)ough/, replace: '$1ou2f' },
|
|
3821
|
+
{ pattern: /^gn/, replace: '2n' },
|
|
3822
|
+
// Special word-final replacement
|
|
3823
|
+
{ pattern: /mb$/, replace: 'm2' },
|
|
3824
|
+
// Character group replacements
|
|
3825
|
+
{ pattern: /cq/g, replace: '2q' },
|
|
3826
|
+
{ pattern: /c(e|i|y)/g, replace: 's$1' },
|
|
3827
|
+
{ pattern: /tch/g, replace: '2ch' },
|
|
3828
|
+
{ pattern: /[cqx]/g, replace: 'k' },
|
|
3829
|
+
{ pattern: /v/g, replace: 'f' },
|
|
3830
|
+
{ pattern: /dg/g, replace: '2g' },
|
|
3831
|
+
{ pattern: /ti(a|o)/g, replace: 'si$1' },
|
|
3832
|
+
{ pattern: /d/g, replace: 't' },
|
|
3833
|
+
{ pattern: /ph/g, replace: 'fh' },
|
|
3834
|
+
{ pattern: /b/g, replace: 'p' },
|
|
3835
|
+
{ pattern: /sh/g, replace: 's2' },
|
|
3836
|
+
{ pattern: /z/g, replace: 's' },
|
|
3837
|
+
// Vowel handling
|
|
3838
|
+
{ pattern: /^[aeiou]/, replace: 'A' },
|
|
3839
|
+
{ pattern: /[aeiou]/g, replace: '3' },
|
|
3840
|
+
// Special gh handling
|
|
3841
|
+
{ pattern: /3gh3/g, replace: '3kh3' },
|
|
3842
|
+
{ pattern: /gh/g, replace: '22' },
|
|
3843
|
+
// Single character replacements
|
|
3844
|
+
{ pattern: /g/g, replace: 'k' },
|
|
3845
|
+
// Collapse repeated consonants
|
|
3846
|
+
{ pattern: /s+/g, replace: 'S' },
|
|
3847
|
+
{ pattern: /t+/g, replace: 'T' },
|
|
3848
|
+
{ pattern: /p+/g, replace: 'P' },
|
|
3849
|
+
{ pattern: /k+/g, replace: 'K' },
|
|
3850
|
+
{ pattern: /f+/g, replace: 'F' },
|
|
3851
|
+
{ pattern: /m+/g, replace: 'M' },
|
|
3852
|
+
{ pattern: /n+/g, replace: 'N' },
|
|
3853
|
+
// Y and other single-letter handling
|
|
3854
|
+
{ pattern: /j/g, replace: 'y' },
|
|
3855
|
+
// L/R/W/Y3 handling
|
|
3856
|
+
{ pattern: /l3/g, replace: 'L3' },
|
|
3857
|
+
{ pattern: /r3/g, replace: 'R3' },
|
|
3858
|
+
{ pattern: /w3/g, replace: 'W3' },
|
|
3859
|
+
{ pattern: /y3/g, replace: 'Y3' },
|
|
3860
|
+
// L/R/W followed by y
|
|
3861
|
+
{ pattern: /ly/g, replace: 'Ly' },
|
|
3862
|
+
{ pattern: /ry/g, replace: 'Ry' },
|
|
3863
|
+
{ pattern: /wy/g, replace: 'Wy' },
|
|
3864
|
+
// WH handling
|
|
3865
|
+
{ pattern: /wh3/g, replace: 'Wh3' },
|
|
3866
|
+
{ pattern: /why/g, replace: 'Why' },
|
|
3867
|
+
// H at start
|
|
3868
|
+
{ pattern: /^h/, replace: 'A' },
|
|
3869
|
+
// Remove certain letters
|
|
3870
|
+
{ pattern: /[hlrwy23]/g, replace: '' }
|
|
3871
|
+
]
|
|
3872
|
+
});
|
|
3873
|
+
// Register the Caverphone 2.0 phonetic mapping for English
|
|
3874
|
+
PhoneticMappingRegistry.add('caverphone', 'en2', {
|
|
3875
|
+
options: { length: 10, pad: '1' },
|
|
3876
|
+
map: {},
|
|
3877
|
+
patterns: [
|
|
3878
|
+
// Remove trailing 'e'
|
|
3879
|
+
{ pattern: /e$/, replace: '' },
|
|
3880
|
+
// Special word-initial replacements
|
|
3881
|
+
{ pattern: /^(c|r|t|en|tr)ough/, replace: '$1ou2f' },
|
|
3882
|
+
{ pattern: /^gn/, replace: '2n' },
|
|
3883
|
+
// Special word-final replacement
|
|
3884
|
+
{ pattern: /mb$/, replace: 'm2' },
|
|
3885
|
+
// Character group replacements
|
|
3886
|
+
{ pattern: /cq/g, replace: '2q' },
|
|
3887
|
+
{ pattern: /c(e|i|y)/g, replace: 's$1' },
|
|
3888
|
+
{ pattern: /tch/g, replace: '2ch' },
|
|
3889
|
+
{ pattern: /[cqx]/g, replace: 'k' },
|
|
3890
|
+
{ pattern: /v/g, replace: 'f' },
|
|
3891
|
+
{ pattern: /dg/g, replace: '2g' },
|
|
3892
|
+
{ pattern: /ti(a|o)/g, replace: 'si$1' },
|
|
3893
|
+
{ pattern: /d/g, replace: 't' },
|
|
3894
|
+
{ pattern: /ph/g, replace: 'fh' },
|
|
3895
|
+
{ pattern: /b/g, replace: 'p' },
|
|
3896
|
+
{ pattern: /sh/g, replace: 's2' },
|
|
3897
|
+
{ pattern: /z/g, replace: 's' },
|
|
3898
|
+
// Vowel handling
|
|
3899
|
+
{ pattern: /^[aeiou]/, replace: 'A' },
|
|
3900
|
+
{ pattern: /[aeiou]/g, replace: '3' },
|
|
3901
|
+
// Y handling
|
|
3902
|
+
{ pattern: /j/g, replace: 'y' },
|
|
3903
|
+
{ pattern: /^y3/, replace: 'Y3' },
|
|
3904
|
+
{ pattern: /^y/, replace: 'A' },
|
|
3905
|
+
{ pattern: /y/g, replace: '3' },
|
|
3906
|
+
// Special gh handling
|
|
3907
|
+
{ pattern: /3gh3/g, replace: '3kh3' },
|
|
3908
|
+
{ pattern: /gh/g, replace: '22' },
|
|
3909
|
+
// Single character replacements
|
|
3910
|
+
{ pattern: /g/g, replace: 'k' },
|
|
3911
|
+
// Collapse repeated consonants
|
|
3912
|
+
{ pattern: /s+/g, replace: 'S' },
|
|
3913
|
+
{ pattern: /t+/g, replace: 'T' },
|
|
3914
|
+
{ pattern: /p+/g, replace: 'P' },
|
|
3915
|
+
{ pattern: /k+/g, replace: 'K' },
|
|
3916
|
+
{ pattern: /f+/g, replace: 'F' },
|
|
3917
|
+
{ pattern: /m+/g, replace: 'M' },
|
|
3918
|
+
{ pattern: /n+/g, replace: 'N' },
|
|
3919
|
+
// L/R/W3 handling
|
|
3920
|
+
{ pattern: /l3/g, replace: 'L3' },
|
|
3921
|
+
{ pattern: /r3/g, replace: 'R3' },
|
|
3922
|
+
{ pattern: /w3/g, replace: 'W3' },
|
|
3923
|
+
{ pattern: /wh3/g, replace: 'Wh3' },
|
|
3924
|
+
{ pattern: /[lrw]$/, replace: '3' },
|
|
3925
|
+
// // H at start and final 3 handling
|
|
3926
|
+
{ pattern: /^h/, replace: 'A' },
|
|
3927
|
+
{ pattern: /3$/, replace: 'A' },
|
|
3928
|
+
// Remove certain letters
|
|
3929
|
+
{ pattern: /[hlrw23]/g, replace: '' }
|
|
3930
|
+
]
|
|
3931
|
+
});
|
|
3932
|
+
|
|
3713
3933
|
/**
|
|
3714
3934
|
* Cologne Phonetic Algorithm
|
|
3715
3935
|
* src/phonetic/Cologne.ts
|
|
@@ -4258,6 +4478,12 @@ class CmpStr {
|
|
|
4258
4478
|
// Prepare the input
|
|
4259
4479
|
const A = skip ? a : this.prepare(a, resolved);
|
|
4260
4480
|
const B = skip ? b : this.prepare(b, resolved);
|
|
4481
|
+
// If the inputs are empty and safeEmpty is enabled, return an empty array
|
|
4482
|
+
if (resolved.safeEmpty && ((Array.isArray(A) && A.length === 0) ||
|
|
4483
|
+
(Array.isArray(B) && B.length === 0) ||
|
|
4484
|
+
A === '' || B === '')) {
|
|
4485
|
+
return [];
|
|
4486
|
+
}
|
|
4261
4487
|
// Get the metric class
|
|
4262
4488
|
const metric = factory.metric(resolved.metric, A, B, resolved.opt);
|
|
4263
4489
|
// Pass the original inputs to the metric
|
|
@@ -4688,6 +4914,12 @@ class CmpStrAsync extends CmpStr {
|
|
|
4688
4914
|
// Prepare the input
|
|
4689
4915
|
const A = skip ? a : await this.prepareAsync(a, resolved);
|
|
4690
4916
|
const B = skip ? b : await this.prepareAsync(b, resolved);
|
|
4917
|
+
// If the inputs are empty and safeEmpty is enabled, return an empty array
|
|
4918
|
+
if (resolved.safeEmpty && ((Array.isArray(A) && A.length === 0) ||
|
|
4919
|
+
(Array.isArray(B) && B.length === 0) ||
|
|
4920
|
+
A === '' || B === '')) {
|
|
4921
|
+
return [];
|
|
4922
|
+
}
|
|
4691
4923
|
// Get the metric class
|
|
4692
4924
|
const metric = factory.metric(resolved.metric, A, B, resolved.opt);
|
|
4693
4925
|
// Pass the original inputs to the metric
|