entity-predictor 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +114 -0
- package/package.json +19 -0
- package/src/index.js +1 -0
- package/src/predictor.js +93 -0
package/README.md
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
# Entity Predictor
|
|
2
|
+
|
|
3
|
+
A lightweight, zero-dependency (almost) Node.js library for entity name prediction and normalization. It uses fuzzy matching to identify entities from messy input, supporting aliases, acronyms, and common typos.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **Fuzzy Matching**: Matches inputs to entities even with typos or partial names.
|
|
8
|
+
- **Alias Support**: Handles acronyms (e.g., "SBI" -> "STATE BANK OF INDIA") and alternative names.
|
|
9
|
+
- **Confidence Scoring**: Returns a confidence score and a human-readable trust level ("Trustable", "High", "Moderate").
|
|
10
|
+
- **Normalization**: Automatically normalizes input to ignore case and special characters.
|
|
11
|
+
|
|
12
|
+
## Installation
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
npm install entity-predictor
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
## Usage
|
|
19
|
+
|
|
20
|
+
### 1. Import and Initialize
|
|
21
|
+
|
|
22
|
+
You can initialize the predictor with a list of entities. Entities can be simple strings or objects defining aliases.
|
|
23
|
+
|
|
24
|
+
```javascript
|
|
25
|
+
import { EntityPredictor } from "entity-predictor";
|
|
26
|
+
|
|
27
|
+
const entities = [
|
|
28
|
+
// Simple string entity
|
|
29
|
+
"ICICI BANK",
|
|
30
|
+
"AXIS BANK",
|
|
31
|
+
|
|
32
|
+
// Entity with aliases
|
|
33
|
+
{
|
|
34
|
+
name: "STATE BANK OF INDIA",
|
|
35
|
+
aliases: ["SBI", "State Bank", "S.B.I."],
|
|
36
|
+
},
|
|
37
|
+
{
|
|
38
|
+
name: "HDFC BANK",
|
|
39
|
+
aliases: ["HDFC", "Housing Development Finance Corporation"],
|
|
40
|
+
},
|
|
41
|
+
];
|
|
42
|
+
|
|
43
|
+
const predictor = new EntityPredictor(entities);
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
### 2. Predict Entities
|
|
47
|
+
|
|
48
|
+
Use the `predict()` method to find the best match for an input string.
|
|
49
|
+
|
|
50
|
+
```javascript
|
|
51
|
+
const result = predictor.predict("sbi");
|
|
52
|
+
|
|
53
|
+
console.log(result);
|
|
54
|
+
/*
|
|
55
|
+
Output:
|
|
56
|
+
{
|
|
57
|
+
entity: "STATE BANK OF INDIA",
|
|
58
|
+
confidence: 1,
|
|
59
|
+
confidenceLevel: "Trustable"
|
|
60
|
+
}
|
|
61
|
+
*/
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
#### Handling Typos
|
|
65
|
+
|
|
66
|
+
```javascript
|
|
67
|
+
const result = predictor.predict("icici bk");
|
|
68
|
+
|
|
69
|
+
console.log(result);
|
|
70
|
+
/*
|
|
71
|
+
Output:
|
|
72
|
+
{
|
|
73
|
+
entity: "ICICI BANK",
|
|
74
|
+
confidence: 0.714...,
|
|
75
|
+
confidenceLevel: "Moderate Confidence"
|
|
76
|
+
}
|
|
77
|
+
*/
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### 3. Add Entities Dynamically
|
|
81
|
+
|
|
82
|
+
You can add new entities to an existing predictor instance.
|
|
83
|
+
|
|
84
|
+
```javascript
|
|
85
|
+
predictor.addEntity("PUNJAB NATIONAL BANK", ["PNB"]);
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
## API Reference
|
|
89
|
+
|
|
90
|
+
### `new EntityPredictor(entities)`
|
|
91
|
+
|
|
92
|
+
- `entities`: Array of strings or objects `{ name: string, aliases: string[] }`.
|
|
93
|
+
|
|
94
|
+
### `predict(input, threshold)`
|
|
95
|
+
|
|
96
|
+
- `input`: String to search for.
|
|
97
|
+
- `threshold`: (Optional) Minimum confidence score to return a match. Default is `0.6`.
|
|
98
|
+
|
|
99
|
+
**Returns:**
|
|
100
|
+
|
|
101
|
+
- `entity`: The canonical name of the matched entity.
|
|
102
|
+
- `confidence`: Score between 0 and 1.
|
|
103
|
+
- `confidenceLevel`:
|
|
104
|
+
- `"Trustable"` (1.0)
|
|
105
|
+
- `"High Confidence"` (>= 0.8)
|
|
106
|
+
- `"Moderate Confidence"` (>= 0.6)
|
|
107
|
+
- `"Low Confidence"` (< 0.6)
|
|
108
|
+
- Returns `null` if the input is invalid.
|
|
109
|
+
- Returns `{ entity: "UNKNOWN", ... }` if no match meets the threshold.
|
|
110
|
+
|
|
111
|
+
### `addEntity(name, aliases)`
|
|
112
|
+
|
|
113
|
+
- `name`: Canonical name of the entity.
|
|
114
|
+
- `aliases`: (Optional) Array of alias strings.
|
package/package.json
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "entity-predictor",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Lightweight entity name prediction and normalization library",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "src/index.js",
|
|
7
|
+
"keywords": [
|
|
8
|
+
"nlp",
|
|
9
|
+
"entity",
|
|
10
|
+
"prediction",
|
|
11
|
+
"nodejs"
|
|
12
|
+
],
|
|
13
|
+
"author": "Sahil",
|
|
14
|
+
"email": "dev.sahilkumar02@gmail.com",
|
|
15
|
+
"license": "MIT",
|
|
16
|
+
"dependencies": {
|
|
17
|
+
"string-similarity": "^4.0.4"
|
|
18
|
+
}
|
|
19
|
+
}
|
package/src/index.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export { EntityPredictor } from "./predictor.js";
|
package/src/predictor.js
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import stringSimilarity from "string-similarity";
|
|
2
|
+
|
|
3
|
+
function normalize(text) {
|
|
4
|
+
return text
|
|
5
|
+
.toLowerCase()
|
|
6
|
+
.replace(/[^a-z]/g, "")
|
|
7
|
+
.trim();
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
export class EntityPredictor {
|
|
11
|
+
constructor(entities = []) {
|
|
12
|
+
this.entities = [];
|
|
13
|
+
this.searchCandidates = [];
|
|
14
|
+
this.candidateToEntity = [];
|
|
15
|
+
|
|
16
|
+
entities.forEach((item) => {
|
|
17
|
+
let entityName;
|
|
18
|
+
let aliases = [];
|
|
19
|
+
|
|
20
|
+
if (typeof item === "string") {
|
|
21
|
+
entityName = item;
|
|
22
|
+
} else if (typeof item === "object" && item.name) {
|
|
23
|
+
entityName = item.name;
|
|
24
|
+
if (Array.isArray(item.aliases)) {
|
|
25
|
+
aliases = item.aliases;
|
|
26
|
+
}
|
|
27
|
+
} else {
|
|
28
|
+
return; // Skip invalid entries
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
this.entities.push(entityName);
|
|
32
|
+
|
|
33
|
+
// Add canonical name to search candidates
|
|
34
|
+
const normalizedName = normalize(entityName);
|
|
35
|
+
this.searchCandidates.push(normalizedName);
|
|
36
|
+
this.candidateToEntity.push(entityName);
|
|
37
|
+
|
|
38
|
+
// Add aliases to search candidates
|
|
39
|
+
aliases.forEach((alias) => {
|
|
40
|
+
this.searchCandidates.push(normalize(alias));
|
|
41
|
+
this.candidateToEntity.push(entityName);
|
|
42
|
+
});
|
|
43
|
+
});
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
predict(input, threshold = 0.6) {
|
|
47
|
+
if (!input || typeof input !== "string") {
|
|
48
|
+
return null;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
const match = stringSimilarity.findBestMatch(
|
|
52
|
+
normalize(input),
|
|
53
|
+
this.searchCandidates
|
|
54
|
+
);
|
|
55
|
+
|
|
56
|
+
const rating = match.bestMatch.rating;
|
|
57
|
+
let confidenceLevel = "Low Confidence";
|
|
58
|
+
|
|
59
|
+
if (rating === 1) {
|
|
60
|
+
confidenceLevel = "Trustable";
|
|
61
|
+
} else if (rating >= 0.8) {
|
|
62
|
+
confidenceLevel = "High Confidence";
|
|
63
|
+
} else if (rating >= 0.6) {
|
|
64
|
+
confidenceLevel = "Moderate Confidence";
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
if (rating >= threshold) {
|
|
68
|
+
return {
|
|
69
|
+
entity: this.candidateToEntity[match.bestMatchIndex],
|
|
70
|
+
confidence: rating,
|
|
71
|
+
confidenceLevel,
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
return {
|
|
76
|
+
entity: "UNKNOWN",
|
|
77
|
+
confidence: rating,
|
|
78
|
+
confidenceLevel,
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
addEntity(entity, aliases = []) {
|
|
83
|
+
this.entities.push(entity);
|
|
84
|
+
const normalizedName = normalize(entity);
|
|
85
|
+
this.searchCandidates.push(normalizedName);
|
|
86
|
+
this.candidateToEntity.push(entity);
|
|
87
|
+
|
|
88
|
+
aliases.forEach((alias) => {
|
|
89
|
+
this.searchCandidates.push(normalize(alias));
|
|
90
|
+
this.candidateToEntity.push(entity);
|
|
91
|
+
});
|
|
92
|
+
}
|
|
93
|
+
}
|