@vspro/name-match-pro 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +185 -0
- package/dist/examples/example.d.ts +1 -0
- package/dist/examples/example.js +41 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +17 -0
- package/dist/utils/nameMatchPro.d.ts +15 -0
- package/dist/utils/nameMatchPro.js +182 -0
- package/package.json +17 -0
- package/src/examples/example.ts +41 -0
- package/src/index.ts +1 -0
- package/src/utils/nameMatchPro.ts +199 -0
- package/tsconfig.json +13 -0
- package/vspro-name-match-pro-1.0.0.tgz +0 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Your Name
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
# @vspro/name-match-pro
|
|
2
|
+
|
|
3
|
+
A robust name matching library for Node.js and TypeScript that combines multiple strategies — initials handling, token swapping, fuzzy similarity, and prefix normalization — for accurate name matching in fintech, insurance, CRM, KYC, OCR and other applications.
|
|
4
|
+
|
|
5
|
+
[](https://www.npmjs.com/package/@vspro/name-match-pro)
|
|
6
|
+

|
|
7
|
+

|
|
8
|
+

|
|
9
|
+

|
|
10
|
+
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
## Features
|
|
14
|
+
|
|
15
|
+
* **Multi-strategy Matching**: Combines exact match, initials handling, swapped tokens, and fuzzy Levenshtein similarity.
|
|
16
|
+
* **Prefix Normalization**: Removes common prefixes like Mr, Mrs, Dr, Shri, Smt, etc.
|
|
17
|
+
* **Initials Expansion & Merge**: Converts `K.S.` → `KS`, `KS` → `K S`, or merges initials intelligently.
|
|
18
|
+
* **Swapped Tokens Handling**: Recognizes names in different order
|
|
19
|
+
(e.g., "Vijay Kumar Sharma" → "Sharma Vijay Kumar"
|
|
20
|
+
"Vijay Kumar Sharma" → "Vijay K S").
|
|
21
|
+
* **Scoring System**: Returns percentage similarity and descriptive remarks: Exact Match, High Similarity, Possible Match, Low Match.
|
|
22
|
+
* **Fuzzy Matching**: Handles typos and variations using Levenshtein distance.
|
|
23
|
+
* **Node.js & TypeScript Ready**: Works in JS, TS, and frameworks like Next.js.
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
## Installation
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
npm install @vspro/name-match-pro
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
Or yarn:
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
yarn add @vspro/name-match-pro
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
---
|
|
40
|
+
|
|
41
|
+
## Quick Start (JavaScript)
|
|
42
|
+
|
|
43
|
+
```javascript
|
|
44
|
+
const { matchNames } = require('@vspro/name-match-pro');
|
|
45
|
+
|
|
46
|
+
const result = matchNames('Sagar Kumar Jangid', 'Sagar Kumar J');
|
|
47
|
+
|
|
48
|
+
console.log(result);
|
|
49
|
+
/*
|
|
50
|
+
{
|
|
51
|
+
inputName: 'Sagar Kumar Jangid',
|
|
52
|
+
givenName: 'Sagar Kumar J',
|
|
53
|
+
percentage: 95,
|
|
54
|
+
remark: 'High Similarity'
|
|
55
|
+
}
|
|
56
|
+
*/
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
---
|
|
60
|
+
|
|
61
|
+
## TypeScript Example
|
|
62
|
+
|
|
63
|
+
```ts
|
|
64
|
+
import { matchNames } from '@vspro/name-match-pro';
|
|
65
|
+
|
|
66
|
+
const result = matchNames('Sagar Kumar Jangid', 'Sagar Kumar J');
|
|
67
|
+
|
|
68
|
+
console.log(result);
|
|
69
|
+
/*
|
|
70
|
+
{
|
|
71
|
+
inputName: 'Sagar Kumar Jangid',
|
|
72
|
+
givenName: 'Sagar Kumar J',
|
|
73
|
+
percentage: 95,
|
|
74
|
+
remark: 'High Similarity'
|
|
75
|
+
}
|
|
76
|
+
*/
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
---
|
|
80
|
+
|
|
81
|
+
## Next.js API Example
|
|
82
|
+
|
|
83
|
+
```ts
|
|
84
|
+
// pages/api/match.ts
|
|
85
|
+
import type { NextApiRequest, NextApiResponse } from 'next';
|
|
86
|
+
import { matchNames } from '@vspro/name-match-pro';
|
|
87
|
+
|
|
88
|
+
export default function handler(req: NextApiRequest, res: NextApiResponse) {
|
|
89
|
+
const { inputName, givenName } = req.query;
|
|
90
|
+
|
|
91
|
+
if (!inputName || !givenName) {
|
|
92
|
+
return res.status(400).json({ error: 'inputName and givenName are required' });
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
const result = matchNames(inputName as string, givenName as string);
|
|
96
|
+
res.status(200).json(result);
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/*
|
|
100
|
+
Request:
|
|
101
|
+
GET /api/match?inputName=Gowri%20K%20S&givenName=GOWRI%20KS
|
|
102
|
+
|
|
103
|
+
Response:
|
|
104
|
+
{
|
|
105
|
+
"inputName": "Gowri K S",
|
|
106
|
+
"givenName": "GOWRI KS",
|
|
107
|
+
"percentage": 100,
|
|
108
|
+
"remark": "Exact Match"
|
|
109
|
+
}
|
|
110
|
+
*/
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
---
|
|
114
|
+
|
|
115
|
+
## Advanced Usage
|
|
116
|
+
|
|
117
|
+
```ts
|
|
118
|
+
import { matchNames } from '@vspro/name-match-pro';
|
|
119
|
+
|
|
120
|
+
const examples = [
|
|
121
|
+
{ input: 'K S Gowri', given: 'Gowri K S' },
|
|
122
|
+
{ input: 'Yadav Vijaysinh Ishwarsinh', given: 'VIJAYSINH ISHWARSINH YADAV' },
|
|
123
|
+
{ input: 'Shilpa Deshpande', given: 'Shilpa P Deshpande' }
|
|
124
|
+
];
|
|
125
|
+
|
|
126
|
+
examples.forEach(({ input, given }) => {
|
|
127
|
+
const result = matchNames(input, given);
|
|
128
|
+
console.log({ input, given, result });
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
/*
|
|
132
|
+
Sample Output:
|
|
133
|
+
|
|
134
|
+
{ input: 'K S Gowri', given: 'Gowri K S', result: { inputName: 'K S Gowri', givenName: 'Gowri K S', percentage: 99, remark: 'High Similarity' } }
|
|
135
|
+
|
|
136
|
+
{ input: 'Yadav Vijaysinh Ishwarsinh', given: 'VIJAYSINH ISHWARSINH YADAV', result: { inputName: 'Yadav Vijaysinh Ishwarsinh', givenName: 'VIJAYSINH ISHWARSINH YADAV', percentage: 100, remark: 'Exact Match' } }
|
|
137
|
+
|
|
138
|
+
{ input: 'Shilpa Deshpande', given: 'Shilpa P Deshpande', result: { inputName: 'Shilpa Deshpande', givenName: 'Shilpa P Deshpande', percentage: 85, remark: 'Possible Match' } }
|
|
139
|
+
*/
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
---
|
|
143
|
+
|
|
144
|
+
## How It Works
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
1. **Matching Strategies**
|
|
148
|
+
|
|
149
|
+
* **Exact Match** → 100%
|
|
150
|
+
* **Swapped Token Match** → 99%
|
|
151
|
+
* **Full Token Containment** → 90–99%
|
|
152
|
+
* **Fuzzy Levenshtein Match** → 60–95%
|
|
153
|
+
* **Initial-only Downgrade** → Avoids false positives
|
|
154
|
+
|
|
155
|
+
2. **Scoring & Remark**
|
|
156
|
+
|
|
157
|
+
* 100 → Exact Match
|
|
158
|
+
* 90–99 → High Similarity
|
|
159
|
+
* 70–89 → Possible Match
|
|
160
|
+
* <70 → Low Match
|
|
161
|
+
|
|
162
|
+
---
|
|
163
|
+
|
|
164
|
+
## Challenges Handled
|
|
165
|
+
|
|
166
|
+
* Different name orders (first last vs. last, first)
|
|
167
|
+
* Middle names and initials
|
|
168
|
+
* Nicknames and formal names
|
|
169
|
+
* Suffixes (Jr, Sr, III)
|
|
170
|
+
* Titles and prefixes (Mr, Dr, Smt)
|
|
171
|
+
* Hyphenated or compound names
|
|
172
|
+
* Case differences, spacing variations, special characters
|
|
173
|
+
|
|
174
|
+
---
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
## License
|
|
178
|
+
|
|
179
|
+
MIT License © 2026 vspro
|
|
180
|
+
|
|
181
|
+
---
|
|
182
|
+
|
|
183
|
+
## Keywords
|
|
184
|
+
|
|
185
|
+
`name-matching`, `name-matching-kyc`, `name-matching-ocr`, `fuzzy-match`, `initials`, `levenshtein`, `typescript`, `nodejs`, `nextjs`, `npm-package`, `name-validation`, `crm`, `fintech`, `insurance`, `KYC`, `OCR`, `CRM`
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
const nameMatchPro_1 = require("../utils/nameMatchPro");
|
|
4
|
+
const examples = [
|
|
5
|
+
{ input: "Vikash Yadav Luniwal", given: "Vikash Y L" },
|
|
6
|
+
{ input: "Vikash Yadav Luniwal", given: "Vikash Yadav" },
|
|
7
|
+
{ input: "Vikash Yadav Luniwal", given: "Vikash Luniwal Yadav" },
|
|
8
|
+
{ input: "Vikash Yadav Luniwal", given: "Vikash Yadav Luniwal" },
|
|
9
|
+
];
|
|
10
|
+
examples.forEach(({ input, given }) => {
|
|
11
|
+
const result = (0, nameMatchPro_1.matchNames)(input, given);
|
|
12
|
+
console.log(result);
|
|
13
|
+
});
|
|
14
|
+
/*
|
|
15
|
+
OUTPUT :
|
|
16
|
+
|
|
17
|
+
{
|
|
18
|
+
inputName: 'Vikash Yadav Luniwal',
|
|
19
|
+
givenName: 'Vikash Y L',
|
|
20
|
+
percentage: 90,
|
|
21
|
+
remark: 'High Similarity'
|
|
22
|
+
}
|
|
23
|
+
{
|
|
24
|
+
inputName: 'Vikash Yadav Luniwal',
|
|
25
|
+
givenName: 'Vikash Yadav',
|
|
26
|
+
percentage: 94,
|
|
27
|
+
remark: 'High Similarity'
|
|
28
|
+
}
|
|
29
|
+
{
|
|
30
|
+
inputName: 'Vikash Yadav Luniwal',
|
|
31
|
+
givenName: 'Vikash Luniwal Yadav',
|
|
32
|
+
percentage: 99,
|
|
33
|
+
remark: 'High Similarity'
|
|
34
|
+
}
|
|
35
|
+
{
|
|
36
|
+
inputName: 'Vikash Yadav Luniwal',
|
|
37
|
+
givenName: 'Vikash Yadav Luniwal',
|
|
38
|
+
percentage: 100,
|
|
39
|
+
remark: 'Exact Match'
|
|
40
|
+
}
|
|
41
|
+
*/
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from './utils/nameMatchPro';
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
14
|
+
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
15
|
+
};
|
|
16
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
|
+
__exportStar(require("./utils/nameMatchPro"), exports);
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
export interface MatchResult {
|
|
2
|
+
inputName: string;
|
|
3
|
+
givenName: string;
|
|
4
|
+
percentage: number;
|
|
5
|
+
remark: string;
|
|
6
|
+
}
|
|
7
|
+
export declare function normalizeName(name: string | undefined): string;
|
|
8
|
+
export declare function mergeInitials(name: string): string;
|
|
9
|
+
export declare function tokenize(name: string): string[];
|
|
10
|
+
export declare function expandCombinedInitials(tokens: string[]): string[];
|
|
11
|
+
export declare function normalizeInitialToken(token: string): string;
|
|
12
|
+
export declare function levenshtein(a: string, b: string): number;
|
|
13
|
+
export declare function isSwappedMatch(tokens1: string[], tokens2: string[]): boolean;
|
|
14
|
+
export declare function isInitialMatch(token1: string, token2: string): boolean;
|
|
15
|
+
export declare function matchNames(inputName: string, givenName: string): MatchResult;
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.normalizeName = normalizeName;
|
|
4
|
+
exports.mergeInitials = mergeInitials;
|
|
5
|
+
exports.tokenize = tokenize;
|
|
6
|
+
exports.expandCombinedInitials = expandCombinedInitials;
|
|
7
|
+
exports.normalizeInitialToken = normalizeInitialToken;
|
|
8
|
+
exports.levenshtein = levenshtein;
|
|
9
|
+
exports.isSwappedMatch = isSwappedMatch;
|
|
10
|
+
exports.isInitialMatch = isInitialMatch;
|
|
11
|
+
exports.matchNames = matchNames;
|
|
12
|
+
const COMMON_PREFIXES = ["mr", "mrs", "ms", "miss", "shri", "smt", "dr"];
|
|
13
|
+
function normalizeName(name) {
|
|
14
|
+
if (!name)
|
|
15
|
+
return "";
|
|
16
|
+
return name
|
|
17
|
+
.toLowerCase()
|
|
18
|
+
.replace(/\./g, "")
|
|
19
|
+
.replace(/\s+/g, " ")
|
|
20
|
+
.trim()
|
|
21
|
+
.split(" ")
|
|
22
|
+
.filter((word) => !COMMON_PREFIXES.includes(word))
|
|
23
|
+
.join(" ");
|
|
24
|
+
}
|
|
25
|
+
function mergeInitials(name) {
|
|
26
|
+
return name.replace(/\b([a-z])\.\s*([a-z])\./g, "$1$2");
|
|
27
|
+
}
|
|
28
|
+
function tokenize(name) {
|
|
29
|
+
return name.split(" ").filter(Boolean);
|
|
30
|
+
}
|
|
31
|
+
function expandCombinedInitials(tokens) {
|
|
32
|
+
let expanded = [];
|
|
33
|
+
for (const token of tokens) {
|
|
34
|
+
if (/^[a-z]{2,4}$/.test(token) && !/[aeiou]/.test(token)) {
|
|
35
|
+
expanded.push(...token.split(""));
|
|
36
|
+
}
|
|
37
|
+
else {
|
|
38
|
+
expanded.push(token);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
return expanded;
|
|
42
|
+
}
|
|
43
|
+
function normalizeInitialToken(token) {
|
|
44
|
+
if (token.length >= 2 && token.length <= 4 && /^[a-z]+$/.test(token) && !/[aeiou]/.test(token)) {
|
|
45
|
+
return token.split("").sort().join("");
|
|
46
|
+
}
|
|
47
|
+
return token;
|
|
48
|
+
}
|
|
49
|
+
function levenshtein(a, b) {
|
|
50
|
+
const matrix = [];
|
|
51
|
+
for (let i = 0; i <= b.length; i++)
|
|
52
|
+
matrix[i] = [i];
|
|
53
|
+
for (let j = 0; j <= a.length; j++)
|
|
54
|
+
matrix[0][j] = j;
|
|
55
|
+
for (let i = 1; i <= b.length; i++) {
|
|
56
|
+
for (let j = 1; j <= a.length; j++) {
|
|
57
|
+
if (b[i - 1] === a[j - 1]) {
|
|
58
|
+
matrix[i][j] = matrix[i - 1][j - 1];
|
|
59
|
+
}
|
|
60
|
+
else {
|
|
61
|
+
matrix[i][j] = Math.min(matrix[i - 1][j - 1] + 1, matrix[i][j - 1] + 1, matrix[i - 1][j] + 1);
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
return matrix[b.length][a.length];
|
|
66
|
+
}
|
|
67
|
+
function isSwappedMatch(tokens1, tokens2) {
|
|
68
|
+
if (tokens1.length !== tokens2.length)
|
|
69
|
+
return false;
|
|
70
|
+
const sorted1 = tokens1.map(normalizeInitialToken).sort().join(" ");
|
|
71
|
+
const sorted2 = tokens2.map(normalizeInitialToken).sort().join(" ");
|
|
72
|
+
return sorted1 === sorted2;
|
|
73
|
+
}
|
|
74
|
+
function isInitialMatch(token1, token2) {
|
|
75
|
+
if (token1.length === 1 && token2.startsWith(token1))
|
|
76
|
+
return true;
|
|
77
|
+
if (token2.length === 1 && token1.startsWith(token2))
|
|
78
|
+
return true;
|
|
79
|
+
return false;
|
|
80
|
+
}
|
|
81
|
+
function matchNames(inputName, givenName) {
|
|
82
|
+
const originalInput = inputName;
|
|
83
|
+
const originalGiven = givenName;
|
|
84
|
+
inputName = mergeInitials(normalizeName(inputName));
|
|
85
|
+
givenName = mergeInitials(normalizeName(givenName));
|
|
86
|
+
let inputTokens = tokenize(inputName);
|
|
87
|
+
let givenTokens = tokenize(givenName);
|
|
88
|
+
inputTokens = expandCombinedInitials(inputTokens);
|
|
89
|
+
givenTokens = expandCombinedInitials(givenTokens);
|
|
90
|
+
let score = 0;
|
|
91
|
+
if (inputName === givenName)
|
|
92
|
+
score = 100;
|
|
93
|
+
else if (isSwappedMatch(inputTokens, givenTokens))
|
|
94
|
+
score = 99;
|
|
95
|
+
else if (givenTokens.every((t) => inputTokens.includes(t)) ||
|
|
96
|
+
inputTokens.every((t) => givenTokens.includes(t))) {
|
|
97
|
+
const longer = Math.max(inputTokens.length, givenTokens.length);
|
|
98
|
+
const shorter = Math.min(inputTokens.length, givenTokens.length);
|
|
99
|
+
const diff = longer - shorter;
|
|
100
|
+
const hasFullTokenMatch = givenTokens.some((t) => t.length > 1 && inputTokens.includes(t));
|
|
101
|
+
if (hasFullTokenMatch) {
|
|
102
|
+
if (diff === 0)
|
|
103
|
+
score = 99;
|
|
104
|
+
else if (diff === 1)
|
|
105
|
+
score = 94;
|
|
106
|
+
else
|
|
107
|
+
score = 90;
|
|
108
|
+
}
|
|
109
|
+
else {
|
|
110
|
+
const matchRatio = shorter / longer;
|
|
111
|
+
score = Math.round(matchRatio * 60);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
else {
|
|
115
|
+
let matchedScore = 0;
|
|
116
|
+
const usedIndexes = new Set();
|
|
117
|
+
for (const g of givenTokens) {
|
|
118
|
+
let bestMatch = 0;
|
|
119
|
+
let bestIndex = -1;
|
|
120
|
+
inputTokens.forEach((i, idx) => {
|
|
121
|
+
if (usedIndexes.has(idx))
|
|
122
|
+
return;
|
|
123
|
+
if (i.startsWith(g) || g.startsWith(i)) {
|
|
124
|
+
const similarity = Math.min(1, (Math.min(i.length, g.length) / Math.max(i.length, g.length)) * 1.1);
|
|
125
|
+
if (similarity > bestMatch) {
|
|
126
|
+
bestMatch = similarity;
|
|
127
|
+
bestIndex = idx;
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
const distance = levenshtein(i, g);
|
|
131
|
+
const similarity = 1 - distance / Math.max(i.length, g.length);
|
|
132
|
+
const weighted = Math.min(1, similarity * 1.05);
|
|
133
|
+
if (weighted > bestMatch) {
|
|
134
|
+
bestMatch = weighted;
|
|
135
|
+
bestIndex = idx;
|
|
136
|
+
}
|
|
137
|
+
if (isInitialMatch(i, g)) {
|
|
138
|
+
const initialScore = 0.85;
|
|
139
|
+
if (initialScore > bestMatch) {
|
|
140
|
+
bestMatch = initialScore;
|
|
141
|
+
bestIndex = idx;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
});
|
|
145
|
+
if (bestIndex !== -1) {
|
|
146
|
+
usedIndexes.add(bestIndex);
|
|
147
|
+
matchedScore += bestMatch;
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
const coverage = matchedScore / inputTokens.length;
|
|
151
|
+
const precision = matchedScore / givenTokens.length;
|
|
152
|
+
score = Math.round((coverage * 0.6 + precision * 0.4) * 100);
|
|
153
|
+
if (score >= 98 && inputName !== givenName)
|
|
154
|
+
score = 95;
|
|
155
|
+
}
|
|
156
|
+
const inputHasFull = inputTokens.some((t) => t.length > 1);
|
|
157
|
+
const givenHasFull = givenTokens.some((t) => t.length > 1);
|
|
158
|
+
const inputAllInitials = inputTokens.every((t) => t.length === 1);
|
|
159
|
+
const givenAllInitials = givenTokens.every((t) => t.length === 1);
|
|
160
|
+
const fullTokenMatchExists = givenTokens.some((g) => g.length > 1 && inputTokens.includes(g));
|
|
161
|
+
if ((inputHasFull && givenAllInitials) || (givenHasFull && inputAllInitials)) {
|
|
162
|
+
score = Math.min(score, 65);
|
|
163
|
+
}
|
|
164
|
+
else if (inputHasFull && givenHasFull && !fullTokenMatchExists) {
|
|
165
|
+
score = Math.min(score, 65);
|
|
166
|
+
}
|
|
167
|
+
let remark;
|
|
168
|
+
if (score === 100)
|
|
169
|
+
remark = "Exact Match";
|
|
170
|
+
else if (score >= 90)
|
|
171
|
+
remark = "High Similarity";
|
|
172
|
+
else if (score >= 70)
|
|
173
|
+
remark = "Possible Match";
|
|
174
|
+
else
|
|
175
|
+
remark = "Low Match";
|
|
176
|
+
return {
|
|
177
|
+
inputName: originalInput,
|
|
178
|
+
givenName: originalGiven,
|
|
179
|
+
percentage: score,
|
|
180
|
+
remark,
|
|
181
|
+
};
|
|
182
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@vspro/name-match-pro",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "A robust name matching utility with initials handling and fuzzy logic useful for fintech and insurance kyc, ocr and crm",
|
|
5
|
+
"main": "dist/index.js",
|
|
6
|
+
"types": "dist/index.d.ts",
|
|
7
|
+
"scripts": {
|
|
8
|
+
"build": "tsc",
|
|
9
|
+
"example": "ts-node src/examples/example.ts"
|
|
10
|
+
},
|
|
11
|
+
"keywords": ["name-matching", "fuzzy", "initials", "levenshtein", "name-matching-kyc", "name-matching-ocr"],
|
|
12
|
+
"license": "MIT",
|
|
13
|
+
"devDependencies": {
|
|
14
|
+
"ts-node": "^10.9.1",
|
|
15
|
+
"typescript": "^5.3.0"
|
|
16
|
+
}
|
|
17
|
+
}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import { matchNames } from "../utils/nameMatchPro";
|
|
2
|
+
const examples = [
|
|
3
|
+
{ input: "Vikash Yadav Luniwal", given: "Vikash Y L" },
|
|
4
|
+
{ input: "Vikash Yadav Luniwal", given: "Vikash Yadav" },
|
|
5
|
+
{ input: "Vikash Yadav Luniwal", given: "Vikash Luniwal Yadav" },
|
|
6
|
+
{ input: "Vikash Yadav Luniwal", given: "Vikash Yadav Luniwal" },
|
|
7
|
+
];
|
|
8
|
+
|
|
9
|
+
examples.forEach(({ input, given }) => {
|
|
10
|
+
const result = matchNames(input, given);
|
|
11
|
+
console.log(result);
|
|
12
|
+
});
|
|
13
|
+
|
|
14
|
+
/*
|
|
15
|
+
OUTPUT :
|
|
16
|
+
|
|
17
|
+
{
|
|
18
|
+
inputName: 'Vikash Yadav Luniwal',
|
|
19
|
+
givenName: 'Vikash Y L',
|
|
20
|
+
percentage: 90,
|
|
21
|
+
remark: 'High Similarity'
|
|
22
|
+
}
|
|
23
|
+
{
|
|
24
|
+
inputName: 'Vikash Yadav Luniwal',
|
|
25
|
+
givenName: 'Vikash Yadav',
|
|
26
|
+
percentage: 94,
|
|
27
|
+
remark: 'High Similarity'
|
|
28
|
+
}
|
|
29
|
+
{
|
|
30
|
+
inputName: 'Vikash Yadav Luniwal',
|
|
31
|
+
givenName: 'Vikash Luniwal Yadav',
|
|
32
|
+
percentage: 99,
|
|
33
|
+
remark: 'High Similarity'
|
|
34
|
+
}
|
|
35
|
+
{
|
|
36
|
+
inputName: 'Vikash Yadav Luniwal',
|
|
37
|
+
givenName: 'Vikash Yadav Luniwal',
|
|
38
|
+
percentage: 100,
|
|
39
|
+
remark: 'Exact Match'
|
|
40
|
+
}
|
|
41
|
+
*/
|
package/src/index.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from './utils/nameMatchPro'
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
|
|
2
|
+
const COMMON_PREFIXES = ["mr", "mrs", "ms", "miss", "shri", "smt", "dr"];
|
|
3
|
+
|
|
4
|
+
export interface MatchResult {
|
|
5
|
+
inputName: string;
|
|
6
|
+
givenName: string;
|
|
7
|
+
percentage: number;
|
|
8
|
+
remark: string;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
export function normalizeName(name: string | undefined): string {
|
|
13
|
+
if (!name) return "";
|
|
14
|
+
|
|
15
|
+
return name
|
|
16
|
+
.toLowerCase()
|
|
17
|
+
.replace(/\./g, "")
|
|
18
|
+
.replace(/\s+/g, " ")
|
|
19
|
+
.trim()
|
|
20
|
+
.split(" ")
|
|
21
|
+
.filter((word) => !COMMON_PREFIXES.includes(word))
|
|
22
|
+
.join(" ");
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export function mergeInitials(name: string): string {
|
|
26
|
+
return name.replace(/\b([a-z])\.\s*([a-z])\./g, "$1$2");
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export function tokenize(name: string): string[] {
|
|
30
|
+
return name.split(" ").filter(Boolean);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export function expandCombinedInitials(tokens: string[]): string[] {
|
|
34
|
+
let expanded: string[] = [];
|
|
35
|
+
|
|
36
|
+
for (const token of tokens) {
|
|
37
|
+
if (/^[a-z]{2,4}$/.test(token) && !/[aeiou]/.test(token)) {
|
|
38
|
+
expanded.push(...token.split(""));
|
|
39
|
+
} else {
|
|
40
|
+
expanded.push(token);
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
return expanded;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export function normalizeInitialToken(token: string): string {
|
|
48
|
+
if (token.length >= 2 && token.length <= 4 && /^[a-z]+$/.test(token) && !/[aeiou]/.test(token)) {
|
|
49
|
+
return token.split("").sort().join("");
|
|
50
|
+
}
|
|
51
|
+
return token;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
export function levenshtein(a: string, b: string): number {
|
|
55
|
+
const matrix: number[][] = [];
|
|
56
|
+
|
|
57
|
+
for (let i = 0; i <= b.length; i++) matrix[i] = [i];
|
|
58
|
+
for (let j = 0; j <= a.length; j++) matrix[0][j] = j;
|
|
59
|
+
|
|
60
|
+
for (let i = 1; i <= b.length; i++) {
|
|
61
|
+
for (let j = 1; j <= a.length; j++) {
|
|
62
|
+
if (b[i - 1] === a[j - 1]) {
|
|
63
|
+
matrix[i][j] = matrix[i - 1][j - 1];
|
|
64
|
+
} else {
|
|
65
|
+
matrix[i][j] = Math.min(
|
|
66
|
+
matrix[i - 1][j - 1] + 1,
|
|
67
|
+
matrix[i][j - 1] + 1,
|
|
68
|
+
matrix[i - 1][j] + 1
|
|
69
|
+
);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
return matrix[b.length][a.length];
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
export function isSwappedMatch(tokens1: string[], tokens2: string[]): boolean {
|
|
78
|
+
if (tokens1.length !== tokens2.length) return false;
|
|
79
|
+
|
|
80
|
+
const sorted1 = tokens1.map(normalizeInitialToken).sort().join(" ");
|
|
81
|
+
const sorted2 = tokens2.map(normalizeInitialToken).sort().join(" ");
|
|
82
|
+
|
|
83
|
+
return sorted1 === sorted2;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
export function isInitialMatch(token1: string, token2: string): boolean {
|
|
87
|
+
if (token1.length === 1 && token2.startsWith(token1)) return true;
|
|
88
|
+
if (token2.length === 1 && token1.startsWith(token2)) return true;
|
|
89
|
+
return false;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
export function matchNames(inputName: string, givenName: string): MatchResult {
|
|
93
|
+
const originalInput = inputName;
|
|
94
|
+
const originalGiven = givenName;
|
|
95
|
+
|
|
96
|
+
inputName = mergeInitials(normalizeName(inputName));
|
|
97
|
+
givenName = mergeInitials(normalizeName(givenName));
|
|
98
|
+
|
|
99
|
+
let inputTokens = tokenize(inputName);
|
|
100
|
+
let givenTokens = tokenize(givenName);
|
|
101
|
+
|
|
102
|
+
inputTokens = expandCombinedInitials(inputTokens);
|
|
103
|
+
givenTokens = expandCombinedInitials(givenTokens);
|
|
104
|
+
|
|
105
|
+
let score = 0;
|
|
106
|
+
|
|
107
|
+
if (inputName === givenName) score = 100;
|
|
108
|
+
else if (isSwappedMatch(inputTokens, givenTokens)) score = 99;
|
|
109
|
+
else if (
|
|
110
|
+
givenTokens.every((t) => inputTokens.includes(t)) ||
|
|
111
|
+
inputTokens.every((t) => givenTokens.includes(t))
|
|
112
|
+
) {
|
|
113
|
+
const longer = Math.max(inputTokens.length, givenTokens.length);
|
|
114
|
+
const shorter = Math.min(inputTokens.length, givenTokens.length);
|
|
115
|
+
const diff = longer - shorter;
|
|
116
|
+
|
|
117
|
+
const hasFullTokenMatch = givenTokens.some((t) => t.length > 1 && inputTokens.includes(t));
|
|
118
|
+
|
|
119
|
+
if (hasFullTokenMatch) {
|
|
120
|
+
if (diff === 0) score = 99;
|
|
121
|
+
else if (diff === 1) score = 94;
|
|
122
|
+
else score = 90;
|
|
123
|
+
} else {
|
|
124
|
+
const matchRatio = shorter / longer;
|
|
125
|
+
score = Math.round(matchRatio * 60);
|
|
126
|
+
}
|
|
127
|
+
} else {
|
|
128
|
+
let matchedScore = 0;
|
|
129
|
+
const usedIndexes = new Set<number>();
|
|
130
|
+
|
|
131
|
+
for (const g of givenTokens) {
|
|
132
|
+
let bestMatch = 0;
|
|
133
|
+
let bestIndex = -1;
|
|
134
|
+
|
|
135
|
+
inputTokens.forEach((i, idx) => {
|
|
136
|
+
if (usedIndexes.has(idx)) return;
|
|
137
|
+
|
|
138
|
+
if (i.startsWith(g) || g.startsWith(i)) {
|
|
139
|
+
const similarity = Math.min(1, (Math.min(i.length, g.length) / Math.max(i.length, g.length)) * 1.1);
|
|
140
|
+
if (similarity > bestMatch) {
|
|
141
|
+
bestMatch = similarity;
|
|
142
|
+
bestIndex = idx;
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
const distance = levenshtein(i, g);
|
|
147
|
+
const similarity = 1 - distance / Math.max(i.length, g.length);
|
|
148
|
+
const weighted = Math.min(1, similarity * 1.05);
|
|
149
|
+
if (weighted > bestMatch) {
|
|
150
|
+
bestMatch = weighted;
|
|
151
|
+
bestIndex = idx;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
if (isInitialMatch(i, g)) {
|
|
155
|
+
const initialScore = 0.85;
|
|
156
|
+
if (initialScore > bestMatch) {
|
|
157
|
+
bestMatch = initialScore;
|
|
158
|
+
bestIndex = idx;
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
});
|
|
162
|
+
|
|
163
|
+
if (bestIndex !== -1) {
|
|
164
|
+
usedIndexes.add(bestIndex);
|
|
165
|
+
matchedScore += bestMatch;
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
const coverage = matchedScore / inputTokens.length;
|
|
170
|
+
const precision = matchedScore / givenTokens.length;
|
|
171
|
+
score = Math.round((coverage * 0.6 + precision * 0.4) * 100);
|
|
172
|
+
|
|
173
|
+
if (score >= 98 && inputName !== givenName) score = 95;
|
|
174
|
+
}
|
|
175
|
+
const inputHasFull = inputTokens.some((t) => t.length > 1);
|
|
176
|
+
const givenHasFull = givenTokens.some((t) => t.length > 1);
|
|
177
|
+
const inputAllInitials = inputTokens.every((t) => t.length === 1);
|
|
178
|
+
const givenAllInitials = givenTokens.every((t) => t.length === 1);
|
|
179
|
+
const fullTokenMatchExists = givenTokens.some((g) => g.length > 1 && inputTokens.includes(g));
|
|
180
|
+
|
|
181
|
+
if ((inputHasFull && givenAllInitials) || (givenHasFull && inputAllInitials)) {
|
|
182
|
+
score = Math.min(score, 65);
|
|
183
|
+
} else if (inputHasFull && givenHasFull && !fullTokenMatchExists) {
|
|
184
|
+
score = Math.min(score, 65);
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
let remark: string;
|
|
188
|
+
if (score === 100) remark = "Exact Match";
|
|
189
|
+
else if (score >= 90) remark = "High Similarity";
|
|
190
|
+
else if (score >= 70) remark = "Possible Match";
|
|
191
|
+
else remark = "Low Match";
|
|
192
|
+
|
|
193
|
+
return {
|
|
194
|
+
inputName: originalInput,
|
|
195
|
+
givenName: originalGiven,
|
|
196
|
+
percentage: score,
|
|
197
|
+
remark,
|
|
198
|
+
};
|
|
199
|
+
}
|
package/tsconfig.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
{
|
|
2
|
+
"compilerOptions": {
|
|
3
|
+
"target": "ES2022",
|
|
4
|
+
"module": "CommonJS",
|
|
5
|
+
"declaration": true,
|
|
6
|
+
"outDir": "dist",
|
|
7
|
+
"strict": true,
|
|
8
|
+
"esModuleInterop": true,
|
|
9
|
+
"forceConsistentCasingInFileNames": true,
|
|
10
|
+
"skipLibCheck": true
|
|
11
|
+
},
|
|
12
|
+
"include": ["src"]
|
|
13
|
+
}
|
|
Binary file
|