npm - nlptoolkit-postagger - Versions diffs - 1.0.0 - Mend

nlptoolkit-postagger 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/README.md +85 -0
package/brown.txt +89329 -0
package/dist/DummyPosTagger.d.ts +20 -0
package/dist/DummyPosTagger.js +47 -0
package/dist/DummyPosTagger.js.map +1 -0
package/dist/HmmPosTagger.d.ts +21 -0
package/dist/HmmPosTagger.js +56 -0
package/dist/HmmPosTagger.js.map +1 -0
package/dist/NaivePosTagger.d.ts +21 -0
package/dist/NaivePosTagger.js +61 -0
package/dist/NaivePosTagger.js.map +1 -0
package/dist/PosTaggedCorpus.d.ts +15 -0
package/dist/PosTaggedCorpus.js +75 -0
package/dist/PosTaggedCorpus.js.map +1 -0
package/dist/PosTaggedWord.d.ts +16 -0
package/dist/PosTaggedWord.js +35 -0
package/dist/PosTaggedWord.js.map +1 -0
package/dist/PosTagger.d.ts +6 -0
package/dist/PosTagger.js +13 -0
package/dist/PosTagger.js.map +1 -0
package/index.js +6 -0
package/package.json +31 -0
package/source/DummyPosTagger.ts +38 -0
package/source/HmmPosTagger.ts +51 -0
package/source/NaivePosTagger.ts +53 -0
package/source/PosTaggedCorpus.ts +62 -0
package/source/PosTaggedWord.ts +25 -0
package/source/PosTagger.ts +9 -0
package/source/tsconfig.json +13 -0
package/tests/DummyPosTaggerTest.ts +26 -0
package/tests/HmmPosTaggerTest.ts +26 -0
package/tests/NaivePosTaggerTest.ts +26 -0
package/tsconfig.json +15 -0

package/dist/DummyPosTagger.d.ts ADDED Viewed

@@ -0,0 +1,20 @@
+import { PosTagger } from "./PosTagger";
+import { Sentence } from "nlptoolkit-corpus/dist/Sentence";
+import { PosTaggedCorpus } from "./PosTaggedCorpus";
+export declare class DummyPosTagger implements PosTagger {
+    private tagList;
+    /**
+     * Test method for the Dummy pos tagger. For each word, the method chooses randomly a tag from all possible
+     * tag list.
+     *
+     * @param sentence Sentence to be tagged.
+     * @return Annotated (tagged) sentence.
+     */
+    posTag(sentence: Sentence): Sentence;
+    /**
+     * Train method for the Dummy pos tagger. The algorithm gets all possible tag list.
+     *
+     * @param corpus Training data for the tagger.
+     */
+    train(corpus: PosTaggedCorpus): void;
+}

package/dist/DummyPosTagger.js ADDED Viewed

@@ -0,0 +1,47 @@
+(function (factory) {
+    if (typeof module === "object" && typeof module.exports === "object") {
+        var v = factory(require, exports);
+        if (v !== undefined) module.exports = v;
+    }
+    else if (typeof define === "function" && define.amd) {
+        define(["require", "exports", "nlptoolkit-corpus/dist/Sentence", "./PosTaggedWord"], factory);
+    }
+})(function (require, exports) {
+    "use strict";
+    Object.defineProperty(exports, "__esModule", { value: true });
+    exports.DummyPosTagger = void 0;
+    const Sentence_1 = require("nlptoolkit-corpus/dist/Sentence");
+    const PosTaggedWord_1 = require("./PosTaggedWord");
+    class DummyPosTagger {
+        constructor() {
+            this.tagList = new Array();
+        }
+        /**
+         * Test method for the Dummy pos tagger. For each word, the method chooses randomly a tag from all possible
+         * tag list.
+         *
+         * @param sentence Sentence to be tagged.
+         * @return Annotated (tagged) sentence.
+         */
+        posTag(sentence) {
+            let result = new Sentence_1.Sentence();
+            for (let i = 0; i < sentence.wordCount(); i++) {
+                result.addWord(new PosTaggedWord_1.PosTaggedWord(sentence.getWord(i).getName(), this.tagList[Math.floor(Math.random() * this.tagList.length)]));
+            }
+            return result;
+        }
+        /**
+         * Train method for the Dummy pos tagger. The algorithm gets all possible tag list.
+         *
+         * @param corpus Training data for the tagger.
+         */
+        train(corpus) {
+            let corpusTagList = corpus.getTagList();
+            for (let tag of corpusTagList) {
+                this.tagList.push(tag);
+            }
+        }
+    }
+    exports.DummyPosTagger = DummyPosTagger;
+});
+//# sourceMappingURL=DummyPosTagger.js.map

package/dist/DummyPosTagger.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"DummyPosTagger.js","sourceRoot":"","sources":["../source/DummyPosTagger.ts"],"names":[],"mappings":";;;;;;;;;;;;IACA,8DAAyD;IAEzD,mDAA8C;IAE9C,MAAa,cAAc;QAA3B;YAEY,YAAO,GAAkB,IAAI,KAAK,EAAU,CAAA;QA8BxD,CAAC;QA5BG;;;;;;WAMG;QACH,MAAM,CAAC,QAAkB;YACrB,IAAI,MAAM,GAAG,IAAI,mBAAQ,EAAE,CAAC;YAC5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,SAAS,EAAE,EAAE,CAAC,EAAE,EAAC;gBAC1C,MAAM,CAAC,OAAO,CAAC,IAAI,6BAAa,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,EAAE,EAC1D,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;aACvE;YACD,OAAO,MAAM,CAAC;QAClB,CAAC;QAED;;;;WAIG;QACH,KAAK,CAAC,MAAuB;YACzB,IAAI,aAAa,GAAG,MAAM,CAAC,UAAU,EAAE,CAAC;YACxC,KAAK,IAAI,GAAG,IAAI,aAAa,EAAC;gBAC1B,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;aACzB;QACL,CAAC;KAEJ;IAhCD,wCAgCC"}

package/dist/HmmPosTagger.d.ts ADDED Viewed

@@ -0,0 +1,21 @@
+import { PosTagger } from "./PosTagger";
+import { Sentence } from "nlptoolkit-corpus/dist/Sentence";
+import { PosTaggedCorpus } from "./PosTaggedCorpus";
+export declare class HmmPosTagger implements PosTagger {
+    private hmm;
+    /**
+     * Test method for the Hmm pos tagger. For each sentence, the method uses the viterbi algorithm to produce the
+     * most possible state sequence for the given sentence.
+     *
+     * @param sentence Sentence to be tagged.
+     * @return Annotated (tagged) sentence.
+     */
+    posTag(sentence: Sentence): Sentence;
+    /**
+     * Train method for the Hmm pos tagger. The algorithm trains an Hmm from the corpus, where corpus constitutes
+     * as an observation array.
+     *
+     * @param corpus Training data for the tagger.
+     */
+    train(corpus: PosTaggedCorpus): void;
+}

package/dist/HmmPosTagger.js ADDED Viewed

@@ -0,0 +1,56 @@
+(function (factory) {
+    if (typeof module === "object" && typeof module.exports === "object") {
+        var v = factory(require, exports);
+        if (v !== undefined) module.exports = v;
+    }
+    else if (typeof define === "function" && define.amd) {
+        define(["require", "exports", "nlptoolkit-corpus/dist/Sentence", "./PosTaggedWord", "nlptoolkit-hmm/dist/Hmm1"], factory);
+    }
+})(function (require, exports) {
+    "use strict";
+    Object.defineProperty(exports, "__esModule", { value: true });
+    exports.HmmPosTagger = void 0;
+    const Sentence_1 = require("nlptoolkit-corpus/dist/Sentence");
+    const PosTaggedWord_1 = require("./PosTaggedWord");
+    const Hmm1_1 = require("nlptoolkit-hmm/dist/Hmm1");
+    class HmmPosTagger {
+        /**
+         * Test method for the Hmm pos tagger. For each sentence, the method uses the viterbi algorithm to produce the
+         * most possible state sequence for the given sentence.
+         *
+         * @param sentence Sentence to be tagged.
+         * @return Annotated (tagged) sentence.
+         */
+        posTag(sentence) {
+            let result = new Sentence_1.Sentence();
+            let tagList = this.hmm.viterbi(sentence.getWords());
+            for (let i = 0; i < sentence.wordCount(); i++) {
+                result.addWord(new PosTaggedWord_1.PosTaggedWord(sentence.getWord(i).getName(), tagList[i]));
+            }
+            return result;
+        }
+        /**
+         * Train method for the Hmm pos tagger. The algorithm trains an Hmm from the corpus, where corpus constitutes
+         * as an observation array.
+         *
+         * @param corpus Training data for the tagger.
+         */
+        train(corpus) {
+            let emittedSymbols = new Array();
+            for (let i = 0; i < emittedSymbols.length; i++) {
+                emittedSymbols.push(new Array());
+                for (let j = 0; j < corpus.getSentence(i).wordCount(); j++) {
+                    let word = corpus.getSentence(i).getWord(j);
+                    emittedSymbols[i].push(word.getTag());
+                }
+            }
+            let tagList = new Set();
+            for (let tag of corpus.getTagList()) {
+                tagList.add(tag);
+            }
+            this.hmm = new Hmm1_1.Hmm1(tagList, emittedSymbols, corpus.getAllWordsAsArrayList());
+        }
+    }
+    exports.HmmPosTagger = HmmPosTagger;
+});
+//# sourceMappingURL=HmmPosTagger.js.map

package/dist/HmmPosTagger.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"HmmPosTagger.js","sourceRoot":"","sources":["../source/HmmPosTagger.ts"],"names":[],"mappings":";;;;;;;;;;;;IACA,8DAAyD;IAIzD,mDAA8C;IAC9C,mDAA8C;IAE9C,MAAa,YAAY;QAIrB;;;;;;WAMG;QACH,MAAM,CAAC,QAAkB;YACrB,IAAI,MAAM,GAAG,IAAI,mBAAQ,EAAE,CAAC;YAC5B,IAAI,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,QAAQ,EAAE,CAAC,CAAC;YACpD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,SAAS,EAAE,EAAE,CAAC,EAAE,EAAC;gBAC1C,MAAM,CAAC,OAAO,CAAC,IAAI,6BAAa,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,EAAE,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;aAChF;YACD,OAAO,MAAM,CAAC;QAClB,CAAC;QAED;;;;;WAKG;QACH,KAAK,CAAC,MAAuB;YACzB,IAAI,cAAc,GAAG,IAAI,KAAK,EAAiB,CAAA;YAC/C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,cAAc,CAAC,MAAM,EAAE,CAAC,EAAE,EAAC;gBAC3C,cAAc,CAAC,IAAI,CAAC,IAAI,KAAK,EAAU,CAAC,CAAC;gBACzC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,SAAS,EAAE,EAAE,CAAC,EAAE,EAAC;oBACvD,IAAI,IAAI,GAAkB,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;oBAC3D,cAAc,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;iBACzC;aACJ;YACD,IAAI,OAAO,GAAG,IAAI,GAAG,EAAU,CAAA;YAC/B,KAAK,IAAI,GAAG,IAAI,MAAM,CAAC,UAAU,EAAE,EAAC;gBAChC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;aACnB;YACD,IAAI,CAAC,GAAG,GAAG,IAAI,WAAI,CAAe,OAAO,EAAE,cAAc,EAAE,MAAM,CAAC,sBAAsB,EAAE,CAAC,CAAC;QAChG,CAAC;KAEJ;IA1CD,oCA0CC"}

package/dist/NaivePosTagger.d.ts ADDED Viewed

@@ -0,0 +1,21 @@
+import { PosTagger } from "./PosTagger";
+import { Sentence } from "nlptoolkit-corpus/dist/Sentence";
+import { PosTaggedCorpus } from "./PosTaggedCorpus";
+export declare class NaivePosTagger implements PosTagger {
+    private maxMap;
+    /**
+     * Test method for the Naive pos tagger. For each word, the method chooses the maximum a posterior tag from all
+     * possible tag list for that word.
+     *
+     * @param sentence Sentence to be tagged.
+     * @return Annotated (tagged) sentence.
+     */
+    posTag(sentence: Sentence): Sentence;
+    /**
+     * Train method for the Naive pos tagger. The algorithm gets all possible tag list. Then counts all
+     * possible tags (with its counts) for each possible word.
+     *
+     * @param corpus Training data for the tagger.
+     */
+    train(corpus: PosTaggedCorpus): void;
+}

package/dist/NaivePosTagger.js ADDED Viewed

@@ -0,0 +1,61 @@
+(function (factory) {
+    if (typeof module === "object" && typeof module.exports === "object") {
+        var v = factory(require, exports);
+        if (v !== undefined) module.exports = v;
+    }
+    else if (typeof define === "function" && define.amd) {
+        define(["require", "exports", "nlptoolkit-corpus/dist/Sentence", "./PosTaggedWord", "nlptoolkit-datastructure/dist/CounterHashMap"], factory);
+    }
+})(function (require, exports) {
+    "use strict";
+    Object.defineProperty(exports, "__esModule", { value: true });
+    exports.NaivePosTagger = void 0;
+    const Sentence_1 = require("nlptoolkit-corpus/dist/Sentence");
+    const PosTaggedWord_1 = require("./PosTaggedWord");
+    const CounterHashMap_1 = require("nlptoolkit-datastructure/dist/CounterHashMap");
+    class NaivePosTagger {
+        /**
+         * Test method for the Naive pos tagger. For each word, the method chooses the maximum a posterior tag from all
+         * possible tag list for that word.
+         *
+         * @param sentence Sentence to be tagged.
+         * @return Annotated (tagged) sentence.
+         */
+        posTag(sentence) {
+            let result = new Sentence_1.Sentence();
+            for (let i = 0; i < sentence.wordCount(); i++) {
+                result.addWord(new PosTaggedWord_1.PosTaggedWord(sentence.getWord(i).getName(), this.maxMap.get(sentence.getWord(i).getName())));
+            }
+            return result;
+        }
+        /**
+         * Train method for the Naive pos tagger. The algorithm gets all possible tag list. Then counts all
+         * possible tags (with its counts) for each possible word.
+         *
+         * @param corpus Training data for the tagger.
+         */
+        train(corpus) {
+            let map = new Map();
+            for (let i = 0; i < corpus.sentenceCount(); i++) {
+                let s = corpus.getSentence(i);
+                for (let j = 0; j < s.wordCount(); j++) {
+                    let word = corpus.getSentence(i).getWord(j);
+                    if (map.has(word.getName())) {
+                        map.get(word.getName()).put(word.getTag());
+                    }
+                    else {
+                        let counterMap = new CounterHashMap_1.CounterHashMap();
+                        counterMap.put(word.getTag());
+                        map.set(word.getName(), counterMap);
+                    }
+                }
+            }
+            this.maxMap = new Map();
+            for (let word of map.keys()) {
+                this.maxMap.set(word, map.get(word).max());
+            }
+        }
+    }
+    exports.NaivePosTagger = NaivePosTagger;
+});
+//# sourceMappingURL=NaivePosTagger.js.map

package/dist/NaivePosTagger.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"NaivePosTagger.js","sourceRoot":"","sources":["../source/NaivePosTagger.ts"],"names":[],"mappings":";;;;;;;;;;;;IACA,8DAAyD;IAEzD,mDAA8C;IAC9C,iFAA4E;IAE5E,MAAa,cAAc;QAIvB;;;;;;WAMG;QACH,MAAM,CAAC,QAAkB;YACrB,IAAI,MAAM,GAAG,IAAI,mBAAQ,EAAE,CAAC;YAC5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,SAAS,EAAE,EAAE,CAAC,EAAE,EAAC;gBAC1C,MAAM,CAAC,OAAO,CAAC,IAAI,6BAAa,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,EAAE,EAAE,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC;aACpH;YACD,OAAO,MAAM,CAAC;QAClB,CAAC;QAED;;;;;WAKG;QACH,KAAK,CAAC,MAAuB;YACzB,IAAI,GAAG,GAAG,IAAI,GAAG,EAAkC,CAAC;YACpD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,aAAa,EAAE,EAAE,CAAC,EAAE,EAAC;gBAC5C,IAAI,CAAC,GAAG,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;gBAC9B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,SAAS,EAAE,EAAE,CAAC,EAAE,EAAC;oBACnC,IAAI,IAAI,GAAmB,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;oBAC5D,IAAI,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC,EAAC;wBACxB,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;qBAC9C;yBAAM;wBACH,IAAI,UAAU,GAAG,IAAI,+BAAc,EAAU,CAAC;wBAC9C,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;wBAC9B,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,EAAE,EAAE,UAAU,CAAC,CAAC;qBACvC;iBACJ;aACJ;YACD,IAAI,CAAC,MAAM,GAAG,IAAI,GAAG,EAAkB,CAAC;YACxC,KAAK,IAAI,IAAI,IAAI,GAAG,CAAC,IAAI,EAAE,EAAC;gBACxB,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC;aAC9C;QACL,CAAC;KAEJ;IA9CD,wCA8CC"}

package/dist/PosTaggedCorpus.d.ts ADDED Viewed

@@ -0,0 +1,15 @@
+import { Corpus } from "nlptoolkit-corpus/dist/Corpus";
+export declare class PosTaggedCorpus extends Corpus {
+    private tagList;
+    /**
+     * A constructor of {@link PosTaggedCorpus} which initializes the sentences of the corpus, the word list of
+     * the corpus, and all possible tags.
+     */
+    constructor(fileName?: string);
+    /**
+     * getTagList returns all possible tags as a set.
+     *
+     * @return Set of all possible tags.
+     */
+    getTagList(): IterableIterator<string>;
+}

package/dist/PosTaggedCorpus.js ADDED Viewed

@@ -0,0 +1,75 @@
+(function (factory) {
+    if (typeof module === "object" && typeof module.exports === "object") {
+        var v = factory(require, exports);
+        if (v !== undefined) module.exports = v;
+    }
+    else if (typeof define === "function" && define.amd) {
+        define(["require", "exports", "nlptoolkit-corpus/dist/Corpus", "nlptoolkit-datastructure/dist/CounterHashMap", "nlptoolkit-corpus/dist/Sentence", "fs", "./PosTaggedWord"], factory);
+    }
+})(function (require, exports) {
+    "use strict";
+    Object.defineProperty(exports, "__esModule", { value: true });
+    exports.PosTaggedCorpus = void 0;
+    const Corpus_1 = require("nlptoolkit-corpus/dist/Corpus");
+    const CounterHashMap_1 = require("nlptoolkit-datastructure/dist/CounterHashMap");
+    const Sentence_1 = require("nlptoolkit-corpus/dist/Sentence");
+    const fs = require("fs");
+    const PosTaggedWord_1 = require("./PosTaggedWord");
+    class PosTaggedCorpus extends Corpus_1.Corpus {
+        /**
+         * A constructor of {@link PosTaggedCorpus} which initializes the sentences of the corpus, the word list of
+         * the corpus, and all possible tags.
+         */
+        constructor(fileName) {
+            super();
+            this.tagList = new CounterHashMap_1.CounterHashMap();
+            if (fileName != undefined) {
+                let newSentence = new Sentence_1.Sentence();
+                let data = fs.readFileSync(fileName, 'utf8');
+                let lines = data.split("\n");
+                for (let line of lines) {
+                    let words = line.split(/\s/);
+                    for (let word of words) {
+                        if (word != "") {
+                            if (word.includes("/")) {
+                                let name = word.substring(0, word.lastIndexOf('/'));
+                                let tag = word.substring(word.lastIndexOf('/') + 1);
+                                let shortTag;
+                                if (tag.includes("+")) {
+                                    shortTag = tag.substring(0, tag.indexOf("+"));
+                                }
+                                else {
+                                    if (tag.includes("-")) {
+                                        shortTag = tag.substring(0, tag.indexOf("-"));
+                                    }
+                                    else {
+                                        shortTag = tag;
+                                    }
+                                }
+                                this.tagList.put(shortTag);
+                                newSentence.addWord(new PosTaggedWord_1.PosTaggedWord(name, shortTag));
+                                if (tag == ".") {
+                                    this.addSentence(newSentence);
+                                    newSentence = new Sentence_1.Sentence();
+                                }
+                            }
+                        }
+                    }
+                }
+                if (newSentence.wordCount() > 0) {
+                    this.addSentence(newSentence);
+                }
+            }
+        }
+        /**
+         * getTagList returns all possible tags as a set.
+         *
+         * @return Set of all possible tags.
+         */
+        getTagList() {
+            return this.tagList.keys();
+        }
+    }
+    exports.PosTaggedCorpus = PosTaggedCorpus;
+});
+//# sourceMappingURL=PosTaggedCorpus.js.map

package/dist/PosTaggedCorpus.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"PosTaggedCorpus.js","sourceRoot":"","sources":["../source/PosTaggedCorpus.ts"],"names":[],"mappings":";;;;;;;;;;;;IAAA,0DAAqD;IACrD,iFAA4E;IAC5E,8DAAyD;IACzD,yBAAyB;IACzB,mDAA8C;IAE9C,MAAa,eAAgB,SAAQ,eAAM;QAIvC;;;WAGG;QACH,YAAY,QAAiB;YACzB,KAAK,EAAE,CAAC;YAPJ,YAAO,GAA2B,IAAI,+BAAc,EAAU,CAAA;YAQlE,IAAI,QAAQ,IAAI,SAAS,EAAE;gBACvB,IAAI,WAAW,GAAG,IAAI,mBAAQ,EAAE,CAAC;gBACjC,IAAI,IAAI,GAAG,EAAE,CAAC,YAAY,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAA;gBAC5C,IAAI,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;gBAC5B,KAAK,IAAI,IAAI,IAAI,KAAK,EAAE;oBACpB,IAAI,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;oBAC7B,KAAK,IAAI,IAAI,IAAI,KAAK,EAAC;wBACnB,IAAI,IAAI,IAAI,EAAE,EAAC;4BACX,IAAI,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAC;gCACnB,IAAI,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC;gCACpD,IAAI,GAAG,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;gCACpD,IAAI,QAAQ,CAAA;gCACZ,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAC;oCAClB,QAAQ,GAAG,GAAG,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC;iCACjD;qCAAM;oCACH,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAC;wCAClB,QAAQ,GAAG,GAAG,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC;qCACjD;yCAAM;wCACH,QAAQ,GAAG,GAAG,CAAC;qCAClB;iCACJ;gCACD,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;gCAC3B,WAAW,CAAC,OAAO,CAAC,IAAI,6BAAa,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC,CAAC;gCACvD,IAAI,GAAG,IAAI,GAAG,EAAC;oCACX,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC,CAAC;oCAC9B,WAAW,GAAG,IAAI,mBAAQ,EAAE,CAAC;iCAChC;6BACJ;yBACJ;qBACJ;iBACJ;gBACD,IAAI,WAAW,CAAC,SAAS,EAAE,GAAG,CAAC,EAAC;oBAC5B,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC,CAAC;iBACjC;aACJ;QACL,CAAC;QAED;;;;WAIG;QACH,UAAU;YACN,OAAO,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAA;QAC9B,CAAC;KACJ;IAvDD,0CAuDC"}

package/dist/PosTaggedWord.d.ts ADDED Viewed

@@ -0,0 +1,16 @@
+import { Word } from "nlptoolkit-dictionary/dist/Dictionary/Word";
+export declare class PosTaggedWord extends Word {
+    private tag;
+    /**
+     * A constructor of {@link PosTaggedWord} which takes name and tag as input and sets the corresponding attributes
+     * @param name Name of the word
+     * @param tag Tag of the word
+     */
+    constructor(name: string, tag: string);
+    /**
+     * Accessor method for tag attribute.
+     *
+     * @return Tag of the word.
+     */
+    getTag(): string;
+}

package/dist/PosTaggedWord.js ADDED Viewed

@@ -0,0 +1,35 @@
+(function (factory) {
+    if (typeof module === "object" && typeof module.exports === "object") {
+        var v = factory(require, exports);
+        if (v !== undefined) module.exports = v;
+    }
+    else if (typeof define === "function" && define.amd) {
+        define(["require", "exports", "nlptoolkit-dictionary/dist/Dictionary/Word"], factory);
+    }
+})(function (require, exports) {
+    "use strict";
+    Object.defineProperty(exports, "__esModule", { value: true });
+    exports.PosTaggedWord = void 0;
+    const Word_1 = require("nlptoolkit-dictionary/dist/Dictionary/Word");
+    class PosTaggedWord extends Word_1.Word {
+        /**
+         * A constructor of {@link PosTaggedWord} which takes name and tag as input and sets the corresponding attributes
+         * @param name Name of the word
+         * @param tag Tag of the word
+         */
+        constructor(name, tag) {
+            super(name);
+            this.tag = tag;
+        }
+        /**
+         * Accessor method for tag attribute.
+         *
+         * @return Tag of the word.
+         */
+        getTag() {
+            return this.tag;
+        }
+    }
+    exports.PosTaggedWord = PosTaggedWord;
+});
+//# sourceMappingURL=PosTaggedWord.js.map

package/dist/PosTaggedWord.js.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"PosTaggedWord.js","sourceRoot":"","sources":["../source/PosTaggedWord.ts"],"names":[],"mappings":";;;;;;;;;;;;IAAA,qEAAgE;IAEhE,MAAa,aAAc,SAAQ,WAAI;QAInC;;;;WAIG;QACH,YAAY,IAAY,EAAE,GAAW;YACjC,KAAK,CAAC,IAAI,CAAC,CAAC;YACZ,IAAI,CAAC,GAAG,GAAG,GAAG,CAAA;QAClB,CAAC;QAED;;;;WAIG;QACH,MAAM;YACF,OAAO,IAAI,CAAC,GAAG,CAAA;QACnB,CAAC;KACJ;IAtBD,sCAsBC"}

package/dist/PosTagger.d.ts ADDED Viewed

@@ -0,0 +1,6 @@
+import { Sentence } from "nlptoolkit-corpus/dist/Sentence";
+import { PosTaggedCorpus } from "./PosTaggedCorpus";
+export interface PosTagger {
+    train(corpus: PosTaggedCorpus): void;
+    posTag(sentence: Sentence): Sentence;
+}

package/dist/PosTagger.js ADDED Viewed

@@ -0,0 +1,13 @@
+(function (factory) {
+    if (typeof module === "object" && typeof module.exports === "object") {
+        var v = factory(require, exports);
+        if (v !== undefined) module.exports = v;
+    }
+    else if (typeof define === "function" && define.amd) {
+        define(["require", "exports"], factory);
+    }
+})(function (require, exports) {
+    "use strict";
+    Object.defineProperty(exports, "__esModule", { value: true });
+});
+//# sourceMappingURL=PosTagger.js.map

package/dist/PosTagger.js.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"PosTagger.js","sourceRoot":"","sources":["../source/PosTagger.ts"],"names":[],"mappings":""}

package/index.js ADDED Viewed

@@ -0,0 +1,6 @@
+export * from "./dist/DummyPosTagger"
+export * from "./dist/HmmPosTagger"
+export * from "./dist/NaivePosTagger"
+export * from "./dist/PosTaggedCorpus"
+export * from "./dist/PosTaggedWord"
+export * from "./dist/PosTagger"

package/package.json ADDED Viewed

@@ -0,0 +1,31 @@
+{
+  "name": "nlptoolkit-postagger",
+  "version": "1.0.0",
+  "description": "",
+  "main": "index.js",
+  "types": "index.js",
+  "scripts": {
+    "test": "Mocha"
+  },
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/StarlangSoftware/EnglishPosTagger-Js.git"
+  },
+  "author": "Olcay Taner Yıldız",
+  "license": "ISC",
+  "bugs": {
+    "url": "https://github.com/StarlangSoftware/EnglishPosTagger-Js/issues"
+  },
+  "homepage": "https://github.com/StarlangSoftware/EnglishPosTagger-Js#readme",
+  "devDependencies": {
+    "@types/mocha": "^9.0.0",
+    "mocha": "^9.1.3",
+    "nlptoolkit-corpus": "^1.0.0",
+    "nlptoolkit-datastructure": "^1.0.0",
+    "nlptoolkit-dictionary": "^1.0.1",
+    "nlptoolkit-hmm": "^1.0.0",
+    "nlptoolkit-math": "^1.0.0",
+    "ts-node": "^10.4.0",
+    "typescript": "^4.5.2"
+  }
+}

package/source/DummyPosTagger.ts ADDED Viewed

@@ -0,0 +1,38 @@
+import {PosTagger} from "./PosTagger";
+import {Sentence} from "nlptoolkit-corpus/dist/Sentence";
+import {PosTaggedCorpus} from "./PosTaggedCorpus";
+import {PosTaggedWord} from "./PosTaggedWord";
+export class DummyPosTagger implements PosTagger{
+    private tagList: Array<string> = new Array<string>()
+    /**
+     * Test method for the Dummy pos tagger. For each word, the method chooses randomly a tag from all possible
+     * tag list.
+     *
+     * @param sentence Sentence to be tagged.
+     * @return Annotated (tagged) sentence.
+     */
+    posTag(sentence: Sentence): Sentence {
+        let result = new Sentence();
+        for (let i = 0; i < sentence.wordCount(); i++){
+            result.addWord(new PosTaggedWord(sentence.getWord(i).getName(),
+                this.tagList[Math.floor(Math.random() * this.tagList.length)]));
+        }
+        return result;
+    }
+    /**
+     * Train method for the Dummy pos tagger. The algorithm gets all possible tag list.
+     *
+     * @param corpus Training data for the tagger.
+     */
+    train(corpus: PosTaggedCorpus): void {
+        let corpusTagList = corpus.getTagList();
+        for (let tag of corpusTagList){
+            this.tagList.push(tag)
+        }
+    }
+}

package/source/HmmPosTagger.ts ADDED Viewed

@@ -0,0 +1,51 @@
+import {PosTagger} from "./PosTagger";
+import {Sentence} from "nlptoolkit-corpus/dist/Sentence";
+import {PosTaggedCorpus} from "./PosTaggedCorpus";
+import {Hmm} from "nlptoolkit-hmm/dist/Hmm";
+import {Word} from "nlptoolkit-dictionary/dist/Dictionary/Word";
+import {PosTaggedWord} from "./PosTaggedWord";
+import {Hmm1} from "nlptoolkit-hmm/dist/Hmm1";
+export class HmmPosTagger implements PosTagger{
+    private hmm: Hmm<string, Word>
+    /**
+     * Test method for the Hmm pos tagger. For each sentence, the method uses the viterbi algorithm to produce the
+     * most possible state sequence for the given sentence.
+     *
+     * @param sentence Sentence to be tagged.
+     * @return Annotated (tagged) sentence.
+     */
+    posTag(sentence: Sentence): Sentence {
+        let result = new Sentence();
+        let tagList = this.hmm.viterbi(sentence.getWords());
+        for (let i = 0; i < sentence.wordCount(); i++){
+            result.addWord(new PosTaggedWord(sentence.getWord(i).getName(), tagList[i]));
+        }
+        return result;
+    }
+    /**
+     * Train method for the Hmm pos tagger. The algorithm trains an Hmm from the corpus, where corpus constitutes
+     * as an observation array.
+     *
+     * @param corpus Training data for the tagger.
+     */
+    train(corpus: PosTaggedCorpus): void {
+        let emittedSymbols = new Array<Array<string>>()
+        for (let i = 0; i < emittedSymbols.length; i++){
+            emittedSymbols.push(new Array<string>());
+            for (let j = 0; j < corpus.getSentence(i).wordCount(); j++){
+                let word = <PosTaggedWord>corpus.getSentence(i).getWord(j);
+                emittedSymbols[i].push(word.getTag());
+            }
+        }
+        let tagList = new Set<string>()
+        for (let tag of corpus.getTagList()){
+            tagList.add(tag)
+        }
+        this.hmm = new Hmm1<string, Word>(tagList, emittedSymbols, corpus.getAllWordsAsArrayList());
+    }
+}

package/source/NaivePosTagger.ts ADDED Viewed

@@ -0,0 +1,53 @@
+import {PosTagger} from "./PosTagger";
+import {Sentence} from "nlptoolkit-corpus/dist/Sentence";
+import {PosTaggedCorpus} from "./PosTaggedCorpus";
+import {PosTaggedWord} from "./PosTaggedWord";
+import {CounterHashMap} from "nlptoolkit-datastructure/dist/CounterHashMap";
+export class NaivePosTagger implements PosTagger{
+    private maxMap: Map<string, string>
+    /**
+     * Test method for the Naive pos tagger. For each word, the method chooses the maximum a posterior tag from all
+     * possible tag list for that word.
+     *
+     * @param sentence Sentence to be tagged.
+     * @return Annotated (tagged) sentence.
+     */
+    posTag(sentence: Sentence): Sentence {
+        let result = new Sentence();
+        for (let i = 0; i < sentence.wordCount(); i++){
+            result.addWord(new PosTaggedWord(sentence.getWord(i).getName(), this.maxMap.get(sentence.getWord(i).getName())));
+        }
+        return result;
+    }
+    /**
+     * Train method for the Naive pos tagger. The algorithm gets all possible tag list. Then counts all
+     * possible tags (with its counts) for each possible word.
+     *
+     * @param corpus Training data for the tagger.
+     */
+    train(corpus: PosTaggedCorpus): void {
+        let map = new Map<string, CounterHashMap<string>>();
+        for (let i = 0; i < corpus.sentenceCount(); i++){
+            let s = corpus.getSentence(i);
+            for (let j = 0; j < s.wordCount(); j++){
+                let word = <PosTaggedWord> corpus.getSentence(i).getWord(j);
+                if (map.has(word.getName())){
+                    map.get(word.getName()).put(word.getTag());
+                } else {
+                    let counterMap = new CounterHashMap<string>();
+                    counterMap.put(word.getTag());
+                    map.set(word.getName(), counterMap);
+                }
+            }
+        }
+        this.maxMap = new Map<string, string>();
+        for (let word of map.keys()){
+            this.maxMap.set(word, map.get(word).max());
+        }
+    }
+}