entity-predictor 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,114 @@
1
+ # Entity Predictor
2
+
3
+ A lightweight, zero-dependency (almost) Node.js library for entity name prediction and normalization. It uses fuzzy matching to identify entities from messy input, supporting aliases, acronyms, and common typos.
4
+
5
+ ## Features
6
+
7
+ - **Fuzzy Matching**: Matches inputs to entities even with typos or partial names.
8
+ - **Alias Support**: Handles acronyms (e.g., "SBI" -> "STATE BANK OF INDIA") and alternative names.
9
+ - **Confidence Scoring**: Returns a confidence score and a human-readable trust level ("Trustable", "High", "Moderate").
10
+ - **Normalization**: Automatically normalizes input to ignore case and special characters.
11
+
12
+ ## Installation
13
+
14
+ ```bash
15
+ npm install entity-predictor
16
+ ```
17
+
18
+ ## Usage
19
+
20
+ ### 1. Import and Initialize
21
+
22
+ You can initialize the predictor with a list of entities. Entities can be simple strings or objects defining aliases.
23
+
24
+ ```javascript
25
+ import { EntityPredictor } from "entity-predictor";
26
+
27
+ const entities = [
28
+ // Simple string entity
29
+ "ICICI BANK",
30
+ "AXIS BANK",
31
+
32
+ // Entity with aliases
33
+ {
34
+ name: "STATE BANK OF INDIA",
35
+ aliases: ["SBI", "State Bank", "S.B.I."],
36
+ },
37
+ {
38
+ name: "HDFC BANK",
39
+ aliases: ["HDFC", "Housing Development Finance Corporation"],
40
+ },
41
+ ];
42
+
43
+ const predictor = new EntityPredictor(entities);
44
+ ```
45
+
46
+ ### 2. Predict Entities
47
+
48
+ Use the `predict()` method to find the best match for an input string.
49
+
50
+ ```javascript
51
+ const result = predictor.predict("sbi");
52
+
53
+ console.log(result);
54
+ /*
55
+ Output:
56
+ {
57
+ entity: "STATE BANK OF INDIA",
58
+ confidence: 1,
59
+ confidenceLevel: "Trustable"
60
+ }
61
+ */
62
+ ```
63
+
64
+ #### Handling Typos
65
+
66
+ ```javascript
67
+ const result = predictor.predict("icici bk");
68
+
69
+ console.log(result);
70
+ /*
71
+ Output:
72
+ {
73
+ entity: "ICICI BANK",
74
+ confidence: 0.714...,
75
+ confidenceLevel: "Moderate Confidence"
76
+ }
77
+ */
78
+ ```
79
+
80
+ ### 3. Add Entities Dynamically
81
+
82
+ You can add new entities to an existing predictor instance.
83
+
84
+ ```javascript
85
+ predictor.addEntity("PUNJAB NATIONAL BANK", ["PNB"]);
86
+ ```
87
+
88
+ ## API Reference
89
+
90
+ ### `new EntityPredictor(entities)`
91
+
92
+ - `entities`: Array of strings or objects `{ name: string, aliases: string[] }`.
93
+
94
+ ### `predict(input, threshold)`
95
+
96
+ - `input`: String to search for.
97
+ - `threshold`: (Optional) Minimum confidence score to return a match. Default is `0.6`.
98
+
99
+ **Returns:**
100
+
101
+ - `entity`: The canonical name of the matched entity.
102
+ - `confidence`: Score between 0 and 1.
103
+ - `confidenceLevel`:
104
+ - `"Trustable"` (1.0)
105
+ - `"High Confidence"` (>= 0.8)
106
+ - `"Moderate Confidence"` (>= 0.6)
107
+ - `"Low Confidence"` (< 0.6)
108
+ - Returns `null` if the input is invalid.
109
+ - Returns `{ entity: "UNKNOWN", ... }` if no match meets the threshold.
110
+
111
+ ### `addEntity(name, aliases)`
112
+
113
+ - `name`: Canonical name of the entity.
114
+ - `aliases`: (Optional) Array of alias strings.
package/package.json ADDED
@@ -0,0 +1,19 @@
1
+ {
2
+ "name": "entity-predictor",
3
+ "version": "1.0.0",
4
+ "description": "Lightweight entity name prediction and normalization library",
5
+ "type": "module",
6
+ "main": "src/index.js",
7
+ "keywords": [
8
+ "nlp",
9
+ "entity",
10
+ "prediction",
11
+ "nodejs"
12
+ ],
13
+ "author": "Sahil",
14
+ "email": "dev.sahilkumar02@gmail.com",
15
+ "license": "MIT",
16
+ "dependencies": {
17
+ "string-similarity": "^4.0.4"
18
+ }
19
+ }
package/src/index.js ADDED
@@ -0,0 +1 @@
1
+ export { EntityPredictor } from "./predictor.js";
@@ -0,0 +1,93 @@
1
+ import stringSimilarity from "string-similarity";
2
+
3
+ function normalize(text) {
4
+ return text
5
+ .toLowerCase()
6
+ .replace(/[^a-z]/g, "")
7
+ .trim();
8
+ }
9
+
10
+ export class EntityPredictor {
11
+ constructor(entities = []) {
12
+ this.entities = [];
13
+ this.searchCandidates = [];
14
+ this.candidateToEntity = [];
15
+
16
+ entities.forEach((item) => {
17
+ let entityName;
18
+ let aliases = [];
19
+
20
+ if (typeof item === "string") {
21
+ entityName = item;
22
+ } else if (typeof item === "object" && item.name) {
23
+ entityName = item.name;
24
+ if (Array.isArray(item.aliases)) {
25
+ aliases = item.aliases;
26
+ }
27
+ } else {
28
+ return; // Skip invalid entries
29
+ }
30
+
31
+ this.entities.push(entityName);
32
+
33
+ // Add canonical name to search candidates
34
+ const normalizedName = normalize(entityName);
35
+ this.searchCandidates.push(normalizedName);
36
+ this.candidateToEntity.push(entityName);
37
+
38
+ // Add aliases to search candidates
39
+ aliases.forEach((alias) => {
40
+ this.searchCandidates.push(normalize(alias));
41
+ this.candidateToEntity.push(entityName);
42
+ });
43
+ });
44
+ }
45
+
46
+ predict(input, threshold = 0.6) {
47
+ if (!input || typeof input !== "string") {
48
+ return null;
49
+ }
50
+
51
+ const match = stringSimilarity.findBestMatch(
52
+ normalize(input),
53
+ this.searchCandidates
54
+ );
55
+
56
+ const rating = match.bestMatch.rating;
57
+ let confidenceLevel = "Low Confidence";
58
+
59
+ if (rating === 1) {
60
+ confidenceLevel = "Trustable";
61
+ } else if (rating >= 0.8) {
62
+ confidenceLevel = "High Confidence";
63
+ } else if (rating >= 0.6) {
64
+ confidenceLevel = "Moderate Confidence";
65
+ }
66
+
67
+ if (rating >= threshold) {
68
+ return {
69
+ entity: this.candidateToEntity[match.bestMatchIndex],
70
+ confidence: rating,
71
+ confidenceLevel,
72
+ };
73
+ }
74
+
75
+ return {
76
+ entity: "UNKNOWN",
77
+ confidence: rating,
78
+ confidenceLevel,
79
+ };
80
+ }
81
+
82
+ addEntity(entity, aliases = []) {
83
+ this.entities.push(entity);
84
+ const normalizedName = normalize(entity);
85
+ this.searchCandidates.push(normalizedName);
86
+ this.candidateToEntity.push(entity);
87
+
88
+ aliases.forEach((alias) => {
89
+ this.searchCandidates.push(normalize(alias));
90
+ this.candidateToEntity.push(entity);
91
+ });
92
+ }
93
+ }