npm - nlptoolkit-namedentityrecognition - Versions diffs - 1.0.0 - Mend

nlptoolkit-namedentityrecognition 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

package/README.md +100 -0
package/dist/AutoNER.d.ts +10 -0
package/dist/AutoNER.js +26 -0
package/dist/AutoNER.js.map +1 -0
package/dist/Gazetteer.d.ts +23 -0
package/dist/Gazetteer.js +50 -0
package/dist/Gazetteer.js.map +1 -0
package/dist/NERCorpus.d.ts +16 -0
package/dist/NERCorpus.js +43 -0
package/dist/NERCorpus.js.map +1 -0
package/dist/NamedEntitySentence.d.ts +9 -0
package/dist/NamedEntitySentence.js +73 -0
package/dist/NamedEntitySentence.js.map +1 -0
package/dist/NamedEntityType.d.ts +8 -0
package/dist/NamedEntityType.js +23 -0
package/dist/NamedEntityType.js.map +1 -0
package/dist/NamedEntityTypeStatic.d.ts +15 -0
package/dist/NamedEntityTypeStatic.js +62 -0
package/dist/NamedEntityTypeStatic.js.map +1 -0
package/dist/NamedEntityWord.d.ts +17 -0
package/dist/NamedEntityWord.js +35 -0
package/dist/NamedEntityWord.js.map +1 -0
package/dist/Slot.d.ts +9 -0
package/dist/Slot.js +60 -0
package/dist/Slot.js.map +1 -0
package/dist/SlotType.d.ts +5 -0
package/dist/SlotType.js +20 -0
package/dist/SlotType.js.map +1 -0
package/gazetteer-location.txt +275 -0
package/gazetteer-organization.txt +1181 -0
package/gazetteer-person.txt +819 -0
package/index.js +9 -0
package/nerdata.txt +27556 -0
package/package.json +29 -0
package/source/AutoNER.ts +17 -0
package/source/Gazetteer.ts +41 -0
package/source/NERCorpus.ts +32 -0
package/source/NamedEntitySentence.ts +58 -0
package/source/NamedEntityType.ts +3 -0
package/source/NamedEntityTypeStatic.ts +50 -0
package/source/NamedEntityWord.ts +26 -0
package/source/Slot.ts +52 -0
package/source/SlotType.ts +3 -0
package/source/tsconfig.json +13 -0
package/tests/GazetteerTest.ts +24 -0
package/tests/NERCorpusTest.ts +28 -0
package/tests/NamedEntityTypeTest.ts +13 -0
package/tests/SlotTest.ts +21 -0
package/tsconfig.json +15 -0

package/package.json ADDED Viewed

@@ -0,0 +1,29 @@
+{
+  "name": "nlptoolkit-namedentityrecognition",
+  "version": "1.0.0",
+  "description": "NER Corpus Processing Library",
+  "main": "index.js",
+  "types": "index.js",
+  "scripts": {
+    "test": "Mocha"
+  },
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/StarlangSoftware/TurkishNamedEntityRecognition-Js.git"
+  },
+  "author": "Olcay Taner Yıldız",
+  "license": "ISC",
+  "bugs": {
+    "url": "https://github.com/StarlangSoftware/TurkishNamedEntityRecognition-Js/issues"
+  },
+  "homepage": "https://github.com/StarlangSoftware/TurkishNamedEntityRecognition-Js#readme",
+  "devDependencies": {
+    "@types/mocha": "^9.0.0",
+    "mocha": "^9.1.3",
+    "nlptoolkit-corpus": "^1.0.0",
+    "nlptoolkit-datastructure": "^1.0.0",
+    "nlptoolkit-dictionary": "^1.0.1",
+    "ts-node": "^10.4.0",
+    "typescript": "^4.5.2"
+  }
+}

package/source/AutoNER.ts ADDED Viewed

@@ -0,0 +1,17 @@
+import {Gazetteer} from "./Gazetteer";
+export abstract class AutoNER {
+    protected personGazetteer: Gazetteer
+    protected organizationGazetteer: Gazetteer
+    protected locationGazetteer: Gazetteer
+    /**
+     * Constructor for creating Person, Organization, and Location gazetteers in automatic Named Entity Recognition.
+     */
+    constructor() {
+        this.personGazetteer = new Gazetteer("PERSON", "gazetteer-person.txt");
+        this.organizationGazetteer = new Gazetteer("ORGANIZATION", "gazetteer-organization.txt");
+        this.locationGazetteer = new Gazetteer("LOCATION", "gazetteer-location.txt");
+    }
+}

package/source/Gazetteer.ts ADDED Viewed

@@ -0,0 +1,41 @@
+import * as fs from "fs";
+export class Gazetteer {
+    private data: Set<string> = new Set<string>()
+    private name: string
+    /**
+     * A constructor for a specific gazetteer. The constructor takes name of the gazetteer and file name of the
+     * gazetteer as input, reads the gazetteer from the input file.
+     * @param name Name of the gazetteer. This name will be used in programming to separate different gazetteers.
+     * @param fileName File name of the gazetteer data.
+     */
+    constructor(name: string, fileName: string) {
+        this.name = name
+        let data = fs.readFileSync(fileName, 'utf8')
+        let lines = data.split("\n")
+        for (let line of lines) {
+            this.data.add(line.toLocaleLowerCase("tr"))
+        }
+    }
+    /**
+     * Accessor method for the name of the gazetteer.
+     * @return Name of the gazetteer.
+     */
+    getName(): string{
+        return this.name
+    }
+    /**
+     * The most important method in {@link Gazetteer} class. Checks if the given word exists in the gazetteer. The check
+     * is done in lowercase form.
+     * @param word Word to be search in Gazetteer.
+     * @return True if the word is in the Gazetteer, False otherwise.
+     */
+    contains(word: string):boolean{
+        let lowerCase = word.toLocaleLowerCase("tr")
+        return this.data.has(lowerCase)
+    }
+}

package/source/NERCorpus.ts ADDED Viewed

@@ -0,0 +1,32 @@
+import {Corpus} from "nlptoolkit-corpus/dist/Corpus";
+import * as fs from "fs";
+import {NamedEntitySentence} from "./NamedEntitySentence";
+import {Sentence} from "nlptoolkit-corpus/dist/Sentence";
+export class NERCorpus extends Corpus{
+    /**
+     * Another constructor of {@link NERCorpus} which takes a fileName of the corpus as an input, reads the
+     * corpus from that file.
+     *
+     * @param fileName Name of the corpus file.
+     */
+    constructor(fileName?: string) {
+        super();
+        if (fileName != undefined){
+            let data = fs.readFileSync(fileName, 'utf8')
+            let lines = data.split("\n")
+            for (let line of lines) {
+                this.addSentence(new NamedEntitySentence(line))
+            }
+        }
+    }
+    /**
+     * addSentence adds a new sentence to the sentences {@link Array}
+     * @param s Sentence to be added.
+     */
+    addSentence(s: Sentence) {
+        this.sentences.push(s)
+    }
+}

package/source/NamedEntitySentence.ts ADDED Viewed

@@ -0,0 +1,58 @@
+import {Sentence} from "nlptoolkit-corpus/dist/Sentence";
+import {NamedEntityType} from "./NamedEntityType";
+import {Word} from "nlptoolkit-dictionary/dist/Dictionary/Word";
+import {NamedEntityTypeStatic} from "./NamedEntityTypeStatic";
+import {NamedEntityWord} from "./NamedEntityWord";
+export class NamedEntitySentence extends Sentence{
+    /**
+     * Another constructor of {@link NamedEntitySentence}. It takes input a named entity annotated sentence in string
+     * form, divides the sentence with respect to space and sets the tagged words with respect to their tags.
+     * @param sentence Named Entity annotated sentence in string form
+     */
+    constructor(sentence?: string) {
+        super();
+        if (sentence != undefined){
+            let type = NamedEntityType.NONE;
+            let wordArray = sentence.split(" ");
+            for (let word of wordArray){
+                if (word != ""){
+                    if (word != "<b_enamex"){
+                        if (word.startsWith("TYPE=\"")){
+                            let typeIndexEnd = word.indexOf('\"', 6);
+                            if (typeIndexEnd != -1){
+                                let entityType = word.substring(6, typeIndexEnd);
+                                type = NamedEntityTypeStatic.getNamedEntityType(entityType);
+                            }
+                            if (word.endsWith("e_enamex>")){
+                                let candidate = word.substring(word.indexOf('>') + 1, word.indexOf('<'));
+                                if (candidate != ""){
+                                    this.words.push(new NamedEntityWord(candidate, type));
+                                }
+                                type = NamedEntityType.NONE;
+                            } else {
+                                let candidate = word.substring(word.indexOf('>') + 1);
+                                if (candidate != ""){
+                                    this.words.push(new NamedEntityWord(candidate, type));
+                                }
+                            }
+                        } else {
+                            if (word.endsWith("e_enamex>")){
+                                let candidate = word.substring(0, word.indexOf('<'));
+                                if (candidate != ""){
+                                    this.words.push(new NamedEntityWord(candidate, type));
+                                }
+                                type = NamedEntityType.NONE;
+                            } else {
+                                if (word != ""){
+                                    this.words.push(new NamedEntityWord(word, type));
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}

package/source/NamedEntityType.ts ADDED Viewed

@@ -0,0 +1,3 @@
+export enum NamedEntityType {
+    NONE, PERSON, ORGANIZATION, LOCATION, TIME, MONEY
+}

package/source/NamedEntityTypeStatic.ts ADDED Viewed

@@ -0,0 +1,50 @@
+import {NamedEntityType} from "./NamedEntityType";
+export class NamedEntityTypeStatic {
+    /**
+     * Static function to convert a string entity type to {@link NamedEntityType} type.
+     * @param entityType Entity type in string form
+     * @return Entity type in {@link NamedEntityType} form
+     */
+    static getNamedEntityType(entityType: string): NamedEntityType{
+        switch (entityType.toUpperCase()){
+            case "PERSON":
+                return NamedEntityType.PERSON;
+            case "LOCATION":
+                return NamedEntityType.LOCATION;
+            case "ORGANIZATION":
+                return NamedEntityType.ORGANIZATION;
+            case "TIME":
+                return NamedEntityType.TIME;
+            case "MONEY":
+                return NamedEntityType.MONEY;
+            default:
+                return NamedEntityType.NONE;
+        }
+    }
+    /**
+     * Static function to convert a {@link NamedEntityType} to string form.
+     * @param entityType Entity type in {@link NamedEntityType} form
+     * @return Entity type in string form
+     */
+    static getNamedEntity(entityType: NamedEntityType): string{
+        if (entityType == undefined)
+            return undefined;
+        switch (entityType){
+            case NamedEntityType.PERSON:
+                return "PERSON";
+            case NamedEntityType.LOCATION:
+                return "LOCATION";
+            case NamedEntityType.ORGANIZATION:
+                return "ORGANIZATION";
+            case NamedEntityType.TIME:
+                return "TIME";
+            case NamedEntityType.MONEY:
+                return "MONEY";
+            default:
+                return "NONE";
+        }
+    }
+}

package/source/NamedEntityWord.ts ADDED Viewed

@@ -0,0 +1,26 @@
+import {Word} from "nlptoolkit-dictionary/dist/Dictionary/Word";
+import {NamedEntityType} from "./NamedEntityType";
+export class NamedEntityWord extends Word{
+    private namedEntityType: NamedEntityType
+    /**
+     * A constructor of {@link NamedEntityWord} which takes name and nameEntityType as input and sets the corresponding attributes
+     * @param name Name of the word
+     * @param namedEntityType {@link NamedEntityType} of the word
+     */
+    constructor(name: string, namedEntityType: NamedEntityType) {
+        super(name);
+        this.namedEntityType = namedEntityType
+    }
+    /**
+     * Accessor method for namedEntityType attribute.
+     *
+     * @return namedEntityType of the word.
+     */
+    getNamedEntityType(): NamedEntityType{
+        return this.namedEntityType
+    }
+}

package/source/Slot.ts ADDED Viewed

@@ -0,0 +1,52 @@
+import {SlotType} from "./SlotType";
+export class Slot {
+    private type: SlotType
+    private tag: string
+    constructor(typeOrSlot: any, tag?: string) {
+        if (tag != undefined){
+            this.type = typeOrSlot
+            this.tag = tag
+        } else {
+            if (typeOrSlot == "O"){
+                this.type = SlotType.O;
+                this.tag = undefined;
+            } else {
+                let type = typeOrSlot.substring(0, typeOrSlot.indexOf("-"));
+                let tag = typeOrSlot.substring(typeOrSlot.indexOf("-") + 1);
+                switch (type){
+                    case "B":
+                        this.type = SlotType.B;
+                        break;
+                    case "I":
+                        this.type = SlotType.I;
+                        break;
+                }
+                this.tag = tag;
+            }
+        }
+    }
+    getType(): SlotType{
+        return this.type
+    }
+    getTag(): string{
+        return this.tag
+    }
+    toString(): string{
+        switch (this.type){
+            case SlotType.O:
+                return "O";
+            case SlotType.B:
+                return "B-" + this.tag;
+            case SlotType.I:
+                return "I-" + this.tag;
+        }
+        return "";
+    }
+}

package/source/SlotType.ts ADDED Viewed

@@ -0,0 +1,3 @@
+export enum SlotType {
+    B, I, O
+}

package/source/tsconfig.json ADDED Viewed

@@ -0,0 +1,13 @@
+{
+  "compilerOptions": {
+    "outDir": "../dist",
+    "module": "umd",
+    "target": "es6",
+    "sourceMap": true,
+    "noImplicitAny": true,
+    "strictNullChecks": false,
+    "removeComments": false,
+    "moduleResolution": "node",
+    "declaration": true
+  }
+}

package/tests/GazetteerTest.ts ADDED Viewed

@@ -0,0 +1,24 @@
+import * as assert from "assert";
+import {Gazetteer} from "../dist/Gazetteer";
+describe('GazetteerTest', function() {
+    describe('GazetteerTest', function() {
+        it('testContains', function() {
+            let gazetteer = new Gazetteer("location", "gazetteer-location.txt");
+            assert.ok(gazetteer.contains("bağdat"));
+            assert.ok(gazetteer.contains("BAĞDAT"));
+            assert.ok(gazetteer.contains("belçika"));
+            assert.ok(gazetteer.contains("BELÇİKA"));
+            assert.ok(gazetteer.contains("körfez"));
+            assert.ok(gazetteer.contains("KÖRFEZ"));
+            assert.ok(gazetteer.contains("küba"));
+            assert.ok(gazetteer.contains("KÜBA"));
+            assert.ok(gazetteer.contains("varşova"));
+            assert.ok(gazetteer.contains("VARŞOVA"));
+            assert.ok(gazetteer.contains("krallık"));
+            assert.ok(gazetteer.contains("KRALLIK"));
+            assert.ok(gazetteer.contains("berlin"));
+            assert.ok(gazetteer.contains("BERLİN"));
+        });
+    });
+});

package/tests/NERCorpusTest.ts ADDED Viewed

@@ -0,0 +1,28 @@
+import * as assert from "assert";
+import {CounterHashMap} from "nlptoolkit-datastructure/dist/CounterHashMap";
+import {NERCorpus} from "../dist/NERCorpus";
+import {NamedEntitySentence} from "../dist/NamedEntitySentence";
+import {NamedEntityWord} from "../dist/NamedEntityWord";
+import {NamedEntityType} from "../dist/NamedEntityType";
+describe('NERCorpusTest', function() {
+    describe('NERCorpusTest', function() {
+        it('testNERCorpus', function() {
+            let counter = new CounterHashMap<NamedEntityType>();
+            let nerCorpus = new NERCorpus("nerdata.txt");
+            assert.strictEqual(27556, nerCorpus.sentenceCount());
+            assert.strictEqual(492233, nerCorpus.numberOfWords());
+            for (let i = 0; i < nerCorpus.sentenceCount(); i++){
+                let namedEntitySentence = <NamedEntitySentence>nerCorpus.getSentence(i);
+                for (let j = 0; j < namedEntitySentence.wordCount(); j++){
+                    let namedEntityWord = <NamedEntityWord> namedEntitySentence.getWord(j);
+                    counter.put(namedEntityWord.getNamedEntityType());
+                }
+            }
+            assert.strictEqual(438976, counter.get(NamedEntityType.NONE));
+            assert.strictEqual(23878, counter.get(NamedEntityType.PERSON));
+            assert.strictEqual(16931, counter.get(NamedEntityType.ORGANIZATION));
+            assert.strictEqual(12448, counter.get(NamedEntityType.LOCATION));
+        });
+    });
+});

package/tests/NamedEntityTypeTest.ts ADDED Viewed

@@ -0,0 +1,13 @@
+import * as assert from "assert";
+import {NamedEntityTypeStatic} from "../source/NamedEntityTypeStatic";
+import {NamedEntityType} from "../source/NamedEntityType";
+describe('NamedEntityTypeTest', function() {
+    describe('NamedEntityTypeTest', function() {
+        it('testNamedEntityType', function() {
+            assert.strictEqual(NamedEntityTypeStatic.getNamedEntityType("person"), NamedEntityType.PERSON);
+            assert.strictEqual(NamedEntityTypeStatic.getNamedEntityType("Time"), NamedEntityType.TIME);
+            assert.strictEqual(NamedEntityTypeStatic.getNamedEntityType("location"), NamedEntityType.LOCATION);
+        });
+    });
+});

package/tests/SlotTest.ts ADDED Viewed

@@ -0,0 +1,21 @@
+import * as assert from "assert";
+import {Slot} from "../dist/Slot";
+import {SlotType} from "../dist/SlotType";
+describe('SlotTest', function() {
+    describe('SlotTest', function() {
+        it('testSlot', function() {
+            let slot1 = new Slot("B-depart_date.month_name");
+            assert.strictEqual(SlotType.B, slot1.getType());
+            assert.strictEqual("depart_date.month_name", slot1.getTag());
+            assert.strictEqual("B-depart_date.month_name", slot1.toString());
+            let slot2 = new Slot("O");
+            assert.strictEqual(SlotType.O, slot2.getType());
+            assert.strictEqual("O", slot2.toString());
+            let slot3 = new Slot("I-round_trip");
+            assert.strictEqual(SlotType.I, slot3.getType());
+            assert.strictEqual("round_trip", slot3.getTag());
+            assert.strictEqual("I-round_trip", slot3.toString());
+        });
+    });
+});

package/tsconfig.json ADDED Viewed

@@ -0,0 +1,15 @@
+{
+  "compilerOptions": {
+    "module": "commonjs",
+    "target": "es5",
+    "sourceMap": true,
+    "noImplicitAny": true,
+    "removeComments": false,
+    "moduleResolution": "node"
+  },
+  "exclude": [
+    "source",
+    "node_modules",
+    "dist"
+  ]
+}