nlptoolkit-postagger 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,20 @@
1
+ import { PosTagger } from "./PosTagger";
2
+ import { Sentence } from "nlptoolkit-corpus/dist/Sentence";
3
+ import { PosTaggedCorpus } from "./PosTaggedCorpus";
4
+ export declare class DummyPosTagger implements PosTagger {
5
+ private tagList;
6
+ /**
7
+ * Test method for the Dummy pos tagger. For each word, the method chooses randomly a tag from all possible
8
+ * tag list.
9
+ *
10
+ * @param sentence Sentence to be tagged.
11
+ * @return Annotated (tagged) sentence.
12
+ */
13
+ posTag(sentence: Sentence): Sentence;
14
+ /**
15
+ * Train method for the Dummy pos tagger. The algorithm gets all possible tag list.
16
+ *
17
+ * @param corpus Training data for the tagger.
18
+ */
19
+ train(corpus: PosTaggedCorpus): void;
20
+ }
@@ -0,0 +1,47 @@
1
+ (function (factory) {
2
+ if (typeof module === "object" && typeof module.exports === "object") {
3
+ var v = factory(require, exports);
4
+ if (v !== undefined) module.exports = v;
5
+ }
6
+ else if (typeof define === "function" && define.amd) {
7
+ define(["require", "exports", "nlptoolkit-corpus/dist/Sentence", "./PosTaggedWord"], factory);
8
+ }
9
+ })(function (require, exports) {
10
+ "use strict";
11
+ Object.defineProperty(exports, "__esModule", { value: true });
12
+ exports.DummyPosTagger = void 0;
13
+ const Sentence_1 = require("nlptoolkit-corpus/dist/Sentence");
14
+ const PosTaggedWord_1 = require("./PosTaggedWord");
15
+ class DummyPosTagger {
16
+ constructor() {
17
+ this.tagList = new Array();
18
+ }
19
+ /**
20
+ * Test method for the Dummy pos tagger. For each word, the method chooses randomly a tag from all possible
21
+ * tag list.
22
+ *
23
+ * @param sentence Sentence to be tagged.
24
+ * @return Annotated (tagged) sentence.
25
+ */
26
+ posTag(sentence) {
27
+ let result = new Sentence_1.Sentence();
28
+ for (let i = 0; i < sentence.wordCount(); i++) {
29
+ result.addWord(new PosTaggedWord_1.PosTaggedWord(sentence.getWord(i).getName(), this.tagList[Math.floor(Math.random() * this.tagList.length)]));
30
+ }
31
+ return result;
32
+ }
33
+ /**
34
+ * Train method for the Dummy pos tagger. The algorithm gets all possible tag list.
35
+ *
36
+ * @param corpus Training data for the tagger.
37
+ */
38
+ train(corpus) {
39
+ let corpusTagList = corpus.getTagList();
40
+ for (let tag of corpusTagList) {
41
+ this.tagList.push(tag);
42
+ }
43
+ }
44
+ }
45
+ exports.DummyPosTagger = DummyPosTagger;
46
+ });
47
+ //# sourceMappingURL=DummyPosTagger.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"DummyPosTagger.js","sourceRoot":"","sources":["../source/DummyPosTagger.ts"],"names":[],"mappings":";;;;;;;;;;;;IACA,8DAAyD;IAEzD,mDAA8C;IAE9C,MAAa,cAAc;QAA3B;YAEY,YAAO,GAAkB,IAAI,KAAK,EAAU,CAAA;QA8BxD,CAAC;QA5BG;;;;;;WAMG;QACH,MAAM,CAAC,QAAkB;YACrB,IAAI,MAAM,GAAG,IAAI,mBAAQ,EAAE,CAAC;YAC5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,SAAS,EAAE,EAAE,CAAC,EAAE,EAAC;gBAC1C,MAAM,CAAC,OAAO,CAAC,IAAI,6BAAa,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,EAAE,EAC1D,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;aACvE;YACD,OAAO,MAAM,CAAC;QAClB,CAAC;QAED;;;;WAIG;QACH,KAAK,CAAC,MAAuB;YACzB,IAAI,aAAa,GAAG,MAAM,CAAC,UAAU,EAAE,CAAC;YACxC,KAAK,IAAI,GAAG,IAAI,aAAa,EAAC;gBAC1B,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;aACzB;QACL,CAAC;KAEJ;IAhCD,wCAgCC"}
@@ -0,0 +1,21 @@
1
+ import { PosTagger } from "./PosTagger";
2
+ import { Sentence } from "nlptoolkit-corpus/dist/Sentence";
3
+ import { PosTaggedCorpus } from "./PosTaggedCorpus";
4
+ export declare class HmmPosTagger implements PosTagger {
5
+ private hmm;
6
+ /**
7
+ * Test method for the Hmm pos tagger. For each sentence, the method uses the viterbi algorithm to produce the
8
+ * most possible state sequence for the given sentence.
9
+ *
10
+ * @param sentence Sentence to be tagged.
11
+ * @return Annotated (tagged) sentence.
12
+ */
13
+ posTag(sentence: Sentence): Sentence;
14
+ /**
15
+ * Train method for the Hmm pos tagger. The algorithm trains an Hmm from the corpus, where corpus constitutes
16
+ * as an observation array.
17
+ *
18
+ * @param corpus Training data for the tagger.
19
+ */
20
+ train(corpus: PosTaggedCorpus): void;
21
+ }
@@ -0,0 +1,56 @@
1
+ (function (factory) {
2
+ if (typeof module === "object" && typeof module.exports === "object") {
3
+ var v = factory(require, exports);
4
+ if (v !== undefined) module.exports = v;
5
+ }
6
+ else if (typeof define === "function" && define.amd) {
7
+ define(["require", "exports", "nlptoolkit-corpus/dist/Sentence", "./PosTaggedWord", "nlptoolkit-hmm/dist/Hmm1"], factory);
8
+ }
9
+ })(function (require, exports) {
10
+ "use strict";
11
+ Object.defineProperty(exports, "__esModule", { value: true });
12
+ exports.HmmPosTagger = void 0;
13
+ const Sentence_1 = require("nlptoolkit-corpus/dist/Sentence");
14
+ const PosTaggedWord_1 = require("./PosTaggedWord");
15
+ const Hmm1_1 = require("nlptoolkit-hmm/dist/Hmm1");
16
+ class HmmPosTagger {
17
+ /**
18
+ * Test method for the Hmm pos tagger. For each sentence, the method uses the viterbi algorithm to produce the
19
+ * most possible state sequence for the given sentence.
20
+ *
21
+ * @param sentence Sentence to be tagged.
22
+ * @return Annotated (tagged) sentence.
23
+ */
24
+ posTag(sentence) {
25
+ let result = new Sentence_1.Sentence();
26
+ let tagList = this.hmm.viterbi(sentence.getWords());
27
+ for (let i = 0; i < sentence.wordCount(); i++) {
28
+ result.addWord(new PosTaggedWord_1.PosTaggedWord(sentence.getWord(i).getName(), tagList[i]));
29
+ }
30
+ return result;
31
+ }
32
+ /**
33
+ * Train method for the Hmm pos tagger. The algorithm trains an Hmm from the corpus, where corpus constitutes
34
+ * as an observation array.
35
+ *
36
+ * @param corpus Training data for the tagger.
37
+ */
38
+ train(corpus) {
39
+ let emittedSymbols = new Array();
40
+ for (let i = 0; i < emittedSymbols.length; i++) {
41
+ emittedSymbols.push(new Array());
42
+ for (let j = 0; j < corpus.getSentence(i).wordCount(); j++) {
43
+ let word = corpus.getSentence(i).getWord(j);
44
+ emittedSymbols[i].push(word.getTag());
45
+ }
46
+ }
47
+ let tagList = new Set();
48
+ for (let tag of corpus.getTagList()) {
49
+ tagList.add(tag);
50
+ }
51
+ this.hmm = new Hmm1_1.Hmm1(tagList, emittedSymbols, corpus.getAllWordsAsArrayList());
52
+ }
53
+ }
54
+ exports.HmmPosTagger = HmmPosTagger;
55
+ });
56
+ //# sourceMappingURL=HmmPosTagger.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"HmmPosTagger.js","sourceRoot":"","sources":["../source/HmmPosTagger.ts"],"names":[],"mappings":";;;;;;;;;;;;IACA,8DAAyD;IAIzD,mDAA8C;IAC9C,mDAA8C;IAE9C,MAAa,YAAY;QAIrB;;;;;;WAMG;QACH,MAAM,CAAC,QAAkB;YACrB,IAAI,MAAM,GAAG,IAAI,mBAAQ,EAAE,CAAC;YAC5B,IAAI,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,QAAQ,EAAE,CAAC,CAAC;YACpD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,SAAS,EAAE,EAAE,CAAC,EAAE,EAAC;gBAC1C,MAAM,CAAC,OAAO,CAAC,IAAI,6BAAa,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,EAAE,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;aAChF;YACD,OAAO,MAAM,CAAC;QAClB,CAAC;QAED;;;;;WAKG;QACH,KAAK,CAAC,MAAuB;YACzB,IAAI,cAAc,GAAG,IAAI,KAAK,EAAiB,CAAA;YAC/C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,cAAc,CAAC,MAAM,EAAE,CAAC,EAAE,EAAC;gBAC3C,cAAc,CAAC,IAAI,CAAC,IAAI,KAAK,EAAU,CAAC,CAAC;gBACzC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,SAAS,EAAE,EAAE,CAAC,EAAE,EAAC;oBACvD,IAAI,IAAI,GAAkB,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;oBAC3D,cAAc,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;iBACzC;aACJ;YACD,IAAI,OAAO,GAAG,IAAI,GAAG,EAAU,CAAA;YAC/B,KAAK,IAAI,GAAG,IAAI,MAAM,CAAC,UAAU,EAAE,EAAC;gBAChC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;aACnB;YACD,IAAI,CAAC,GAAG,GAAG,IAAI,WAAI,CAAe,OAAO,EAAE,cAAc,EAAE,MAAM,CAAC,sBAAsB,EAAE,CAAC,CAAC;QAChG,CAAC;KAEJ;IA1CD,oCA0CC"}
@@ -0,0 +1,21 @@
1
+ import { PosTagger } from "./PosTagger";
2
+ import { Sentence } from "nlptoolkit-corpus/dist/Sentence";
3
+ import { PosTaggedCorpus } from "./PosTaggedCorpus";
4
+ export declare class NaivePosTagger implements PosTagger {
5
+ private maxMap;
6
+ /**
7
+ * Test method for the Naive pos tagger. For each word, the method chooses the maximum a posterior tag from all
8
+ * possible tag list for that word.
9
+ *
10
+ * @param sentence Sentence to be tagged.
11
+ * @return Annotated (tagged) sentence.
12
+ */
13
+ posTag(sentence: Sentence): Sentence;
14
+ /**
15
+ * Train method for the Naive pos tagger. The algorithm gets all possible tag list. Then counts all
16
+ * possible tags (with its counts) for each possible word.
17
+ *
18
+ * @param corpus Training data for the tagger.
19
+ */
20
+ train(corpus: PosTaggedCorpus): void;
21
+ }
@@ -0,0 +1,61 @@
1
+ (function (factory) {
2
+ if (typeof module === "object" && typeof module.exports === "object") {
3
+ var v = factory(require, exports);
4
+ if (v !== undefined) module.exports = v;
5
+ }
6
+ else if (typeof define === "function" && define.amd) {
7
+ define(["require", "exports", "nlptoolkit-corpus/dist/Sentence", "./PosTaggedWord", "nlptoolkit-datastructure/dist/CounterHashMap"], factory);
8
+ }
9
+ })(function (require, exports) {
10
+ "use strict";
11
+ Object.defineProperty(exports, "__esModule", { value: true });
12
+ exports.NaivePosTagger = void 0;
13
+ const Sentence_1 = require("nlptoolkit-corpus/dist/Sentence");
14
+ const PosTaggedWord_1 = require("./PosTaggedWord");
15
+ const CounterHashMap_1 = require("nlptoolkit-datastructure/dist/CounterHashMap");
16
+ class NaivePosTagger {
17
+ /**
18
+ * Test method for the Naive pos tagger. For each word, the method chooses the maximum a posterior tag from all
19
+ * possible tag list for that word.
20
+ *
21
+ * @param sentence Sentence to be tagged.
22
+ * @return Annotated (tagged) sentence.
23
+ */
24
+ posTag(sentence) {
25
+ let result = new Sentence_1.Sentence();
26
+ for (let i = 0; i < sentence.wordCount(); i++) {
27
+ result.addWord(new PosTaggedWord_1.PosTaggedWord(sentence.getWord(i).getName(), this.maxMap.get(sentence.getWord(i).getName())));
28
+ }
29
+ return result;
30
+ }
31
+ /**
32
+ * Train method for the Naive pos tagger. The algorithm gets all possible tag list. Then counts all
33
+ * possible tags (with its counts) for each possible word.
34
+ *
35
+ * @param corpus Training data for the tagger.
36
+ */
37
+ train(corpus) {
38
+ let map = new Map();
39
+ for (let i = 0; i < corpus.sentenceCount(); i++) {
40
+ let s = corpus.getSentence(i);
41
+ for (let j = 0; j < s.wordCount(); j++) {
42
+ let word = corpus.getSentence(i).getWord(j);
43
+ if (map.has(word.getName())) {
44
+ map.get(word.getName()).put(word.getTag());
45
+ }
46
+ else {
47
+ let counterMap = new CounterHashMap_1.CounterHashMap();
48
+ counterMap.put(word.getTag());
49
+ map.set(word.getName(), counterMap);
50
+ }
51
+ }
52
+ }
53
+ this.maxMap = new Map();
54
+ for (let word of map.keys()) {
55
+ this.maxMap.set(word, map.get(word).max());
56
+ }
57
+ }
58
+ }
59
+ exports.NaivePosTagger = NaivePosTagger;
60
+ });
61
+ //# sourceMappingURL=NaivePosTagger.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"NaivePosTagger.js","sourceRoot":"","sources":["../source/NaivePosTagger.ts"],"names":[],"mappings":";;;;;;;;;;;;IACA,8DAAyD;IAEzD,mDAA8C;IAC9C,iFAA4E;IAE5E,MAAa,cAAc;QAIvB;;;;;;WAMG;QACH,MAAM,CAAC,QAAkB;YACrB,IAAI,MAAM,GAAG,IAAI,mBAAQ,EAAE,CAAC;YAC5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,SAAS,EAAE,EAAE,CAAC,EAAE,EAAC;gBAC1C,MAAM,CAAC,OAAO,CAAC,IAAI,6BAAa,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,EAAE,EAAE,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC;aACpH;YACD,OAAO,MAAM,CAAC;QAClB,CAAC;QAED;;;;;WAKG;QACH,KAAK,CAAC,MAAuB;YACzB,IAAI,GAAG,GAAG,IAAI,GAAG,EAAkC,CAAC;YACpD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,aAAa,EAAE,EAAE,CAAC,EAAE,EAAC;gBAC5C,IAAI,CAAC,GAAG,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;gBAC9B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,SAAS,EAAE,EAAE,CAAC,EAAE,EAAC;oBACnC,IAAI,IAAI,GAAmB,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;oBAC5D,IAAI,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC,EAAC;wBACxB,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;qBAC9C;yBAAM;wBACH,IAAI,UAAU,GAAG,IAAI,+BAAc,EAAU,CAAC;wBAC9C,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;wBAC9B,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,OAAO,EAAE,EAAE,UAAU,CAAC,CAAC;qBACvC;iBACJ;aACJ;YACD,IAAI,CAAC,MAAM,GAAG,IAAI,GAAG,EAAkB,CAAC;YACxC,KAAK,IAAI,IAAI,IAAI,GAAG,CAAC,IAAI,EAAE,EAAC;gBACxB,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC;aAC9C;QACL,CAAC;KAEJ;IA9CD,wCA8CC"}
@@ -0,0 +1,15 @@
1
+ import { Corpus } from "nlptoolkit-corpus/dist/Corpus";
2
+ export declare class PosTaggedCorpus extends Corpus {
3
+ private tagList;
4
+ /**
5
+ * A constructor of {@link PosTaggedCorpus} which initializes the sentences of the corpus, the word list of
6
+ * the corpus, and all possible tags.
7
+ */
8
+ constructor(fileName?: string);
9
+ /**
10
+ * getTagList returns all possible tags as a set.
11
+ *
12
+ * @return Set of all possible tags.
13
+ */
14
+ getTagList(): IterableIterator<string>;
15
+ }
@@ -0,0 +1,75 @@
1
+ (function (factory) {
2
+ if (typeof module === "object" && typeof module.exports === "object") {
3
+ var v = factory(require, exports);
4
+ if (v !== undefined) module.exports = v;
5
+ }
6
+ else if (typeof define === "function" && define.amd) {
7
+ define(["require", "exports", "nlptoolkit-corpus/dist/Corpus", "nlptoolkit-datastructure/dist/CounterHashMap", "nlptoolkit-corpus/dist/Sentence", "fs", "./PosTaggedWord"], factory);
8
+ }
9
+ })(function (require, exports) {
10
+ "use strict";
11
+ Object.defineProperty(exports, "__esModule", { value: true });
12
+ exports.PosTaggedCorpus = void 0;
13
+ const Corpus_1 = require("nlptoolkit-corpus/dist/Corpus");
14
+ const CounterHashMap_1 = require("nlptoolkit-datastructure/dist/CounterHashMap");
15
+ const Sentence_1 = require("nlptoolkit-corpus/dist/Sentence");
16
+ const fs = require("fs");
17
+ const PosTaggedWord_1 = require("./PosTaggedWord");
18
+ class PosTaggedCorpus extends Corpus_1.Corpus {
19
+ /**
20
+ * A constructor of {@link PosTaggedCorpus} which initializes the sentences of the corpus, the word list of
21
+ * the corpus, and all possible tags.
22
+ */
23
+ constructor(fileName) {
24
+ super();
25
+ this.tagList = new CounterHashMap_1.CounterHashMap();
26
+ if (fileName != undefined) {
27
+ let newSentence = new Sentence_1.Sentence();
28
+ let data = fs.readFileSync(fileName, 'utf8');
29
+ let lines = data.split("\n");
30
+ for (let line of lines) {
31
+ let words = line.split(/\s/);
32
+ for (let word of words) {
33
+ if (word != "") {
34
+ if (word.includes("/")) {
35
+ let name = word.substring(0, word.lastIndexOf('/'));
36
+ let tag = word.substring(word.lastIndexOf('/') + 1);
37
+ let shortTag;
38
+ if (tag.includes("+")) {
39
+ shortTag = tag.substring(0, tag.indexOf("+"));
40
+ }
41
+ else {
42
+ if (tag.includes("-")) {
43
+ shortTag = tag.substring(0, tag.indexOf("-"));
44
+ }
45
+ else {
46
+ shortTag = tag;
47
+ }
48
+ }
49
+ this.tagList.put(shortTag);
50
+ newSentence.addWord(new PosTaggedWord_1.PosTaggedWord(name, shortTag));
51
+ if (tag == ".") {
52
+ this.addSentence(newSentence);
53
+ newSentence = new Sentence_1.Sentence();
54
+ }
55
+ }
56
+ }
57
+ }
58
+ }
59
+ if (newSentence.wordCount() > 0) {
60
+ this.addSentence(newSentence);
61
+ }
62
+ }
63
+ }
64
+ /**
65
+ * getTagList returns all possible tags as a set.
66
+ *
67
+ * @return Set of all possible tags.
68
+ */
69
+ getTagList() {
70
+ return this.tagList.keys();
71
+ }
72
+ }
73
+ exports.PosTaggedCorpus = PosTaggedCorpus;
74
+ });
75
+ //# sourceMappingURL=PosTaggedCorpus.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"PosTaggedCorpus.js","sourceRoot":"","sources":["../source/PosTaggedCorpus.ts"],"names":[],"mappings":";;;;;;;;;;;;IAAA,0DAAqD;IACrD,iFAA4E;IAC5E,8DAAyD;IACzD,yBAAyB;IACzB,mDAA8C;IAE9C,MAAa,eAAgB,SAAQ,eAAM;QAIvC;;;WAGG;QACH,YAAY,QAAiB;YACzB,KAAK,EAAE,CAAC;YAPJ,YAAO,GAA2B,IAAI,+BAAc,EAAU,CAAA;YAQlE,IAAI,QAAQ,IAAI,SAAS,EAAE;gBACvB,IAAI,WAAW,GAAG,IAAI,mBAAQ,EAAE,CAAC;gBACjC,IAAI,IAAI,GAAG,EAAE,CAAC,YAAY,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAA;gBAC5C,IAAI,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;gBAC5B,KAAK,IAAI,IAAI,IAAI,KAAK,EAAE;oBACpB,IAAI,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;oBAC7B,KAAK,IAAI,IAAI,IAAI,KAAK,EAAC;wBACnB,IAAI,IAAI,IAAI,EAAE,EAAC;4BACX,IAAI,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAC;gCACnB,IAAI,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC;gCACpD,IAAI,GAAG,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;gCACpD,IAAI,QAAQ,CAAA;gCACZ,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAC;oCAClB,QAAQ,GAAG,GAAG,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC;iCACjD;qCAAM;oCACH,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAC;wCAClB,QAAQ,GAAG,GAAG,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC;qCACjD;yCAAM;wCACH,QAAQ,GAAG,GAAG,CAAC;qCAClB;iCACJ;gCACD,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;gCAC3B,WAAW,CAAC,OAAO,CAAC,IAAI,6BAAa,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC,CAAC;gCACvD,IAAI,GAAG,IAAI,GAAG,EAAC;oCACX,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC,CAAC;oCAC9B,WAAW,GAAG,IAAI,mBAAQ,EAAE,CAAC;iCAChC;6BACJ;yBACJ;qBACJ;iBACJ;gBACD,IAAI,WAAW,CAAC,SAAS,EAAE,GAAG,CAAC,EAAC;oBAC5B,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC,CAAC;iBACjC;aACJ;QACL,CAAC;QAED;;;;WAIG;QACH,UAAU;YACN,OAAO,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAA;QAC9B,CAAC;KACJ;IAvDD,0CAuDC"}
@@ -0,0 +1,16 @@
1
+ import { Word } from "nlptoolkit-dictionary/dist/Dictionary/Word";
2
+ export declare class PosTaggedWord extends Word {
3
+ private tag;
4
+ /**
5
+ * A constructor of {@link PosTaggedWord} which takes name and tag as input and sets the corresponding attributes
6
+ * @param name Name of the word
7
+ * @param tag Tag of the word
8
+ */
9
+ constructor(name: string, tag: string);
10
+ /**
11
+ * Accessor method for tag attribute.
12
+ *
13
+ * @return Tag of the word.
14
+ */
15
+ getTag(): string;
16
+ }
@@ -0,0 +1,35 @@
1
+ (function (factory) {
2
+ if (typeof module === "object" && typeof module.exports === "object") {
3
+ var v = factory(require, exports);
4
+ if (v !== undefined) module.exports = v;
5
+ }
6
+ else if (typeof define === "function" && define.amd) {
7
+ define(["require", "exports", "nlptoolkit-dictionary/dist/Dictionary/Word"], factory);
8
+ }
9
+ })(function (require, exports) {
10
+ "use strict";
11
+ Object.defineProperty(exports, "__esModule", { value: true });
12
+ exports.PosTaggedWord = void 0;
13
+ const Word_1 = require("nlptoolkit-dictionary/dist/Dictionary/Word");
14
+ class PosTaggedWord extends Word_1.Word {
15
+ /**
16
+ * A constructor of {@link PosTaggedWord} which takes name and tag as input and sets the corresponding attributes
17
+ * @param name Name of the word
18
+ * @param tag Tag of the word
19
+ */
20
+ constructor(name, tag) {
21
+ super(name);
22
+ this.tag = tag;
23
+ }
24
+ /**
25
+ * Accessor method for tag attribute.
26
+ *
27
+ * @return Tag of the word.
28
+ */
29
+ getTag() {
30
+ return this.tag;
31
+ }
32
+ }
33
+ exports.PosTaggedWord = PosTaggedWord;
34
+ });
35
+ //# sourceMappingURL=PosTaggedWord.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"PosTaggedWord.js","sourceRoot":"","sources":["../source/PosTaggedWord.ts"],"names":[],"mappings":";;;;;;;;;;;;IAAA,qEAAgE;IAEhE,MAAa,aAAc,SAAQ,WAAI;QAInC;;;;WAIG;QACH,YAAY,IAAY,EAAE,GAAW;YACjC,KAAK,CAAC,IAAI,CAAC,CAAC;YACZ,IAAI,CAAC,GAAG,GAAG,GAAG,CAAA;QAClB,CAAC;QAED;;;;WAIG;QACH,MAAM;YACF,OAAO,IAAI,CAAC,GAAG,CAAA;QACnB,CAAC;KACJ;IAtBD,sCAsBC"}
@@ -0,0 +1,6 @@
1
+ import { Sentence } from "nlptoolkit-corpus/dist/Sentence";
2
+ import { PosTaggedCorpus } from "./PosTaggedCorpus";
3
+ export interface PosTagger {
4
+ train(corpus: PosTaggedCorpus): void;
5
+ posTag(sentence: Sentence): Sentence;
6
+ }
@@ -0,0 +1,13 @@
1
+ (function (factory) {
2
+ if (typeof module === "object" && typeof module.exports === "object") {
3
+ var v = factory(require, exports);
4
+ if (v !== undefined) module.exports = v;
5
+ }
6
+ else if (typeof define === "function" && define.amd) {
7
+ define(["require", "exports"], factory);
8
+ }
9
+ })(function (require, exports) {
10
+ "use strict";
11
+ Object.defineProperty(exports, "__esModule", { value: true });
12
+ });
13
+ //# sourceMappingURL=PosTagger.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"PosTagger.js","sourceRoot":"","sources":["../source/PosTagger.ts"],"names":[],"mappings":""}
package/index.js ADDED
@@ -0,0 +1,6 @@
1
+ export * from "./dist/DummyPosTagger"
2
+ export * from "./dist/HmmPosTagger"
3
+ export * from "./dist/NaivePosTagger"
4
+ export * from "./dist/PosTaggedCorpus"
5
+ export * from "./dist/PosTaggedWord"
6
+ export * from "./dist/PosTagger"
package/package.json ADDED
@@ -0,0 +1,31 @@
1
+ {
2
+ "name": "nlptoolkit-postagger",
3
+ "version": "1.0.0",
4
+ "description": "",
5
+ "main": "index.js",
6
+ "types": "index.js",
7
+ "scripts": {
8
+ "test": "Mocha"
9
+ },
10
+ "repository": {
11
+ "type": "git",
12
+ "url": "git+https://github.com/StarlangSoftware/EnglishPosTagger-Js.git"
13
+ },
14
+ "author": "Olcay Taner Yıldız",
15
+ "license": "ISC",
16
+ "bugs": {
17
+ "url": "https://github.com/StarlangSoftware/EnglishPosTagger-Js/issues"
18
+ },
19
+ "homepage": "https://github.com/StarlangSoftware/EnglishPosTagger-Js#readme",
20
+ "devDependencies": {
21
+ "@types/mocha": "^9.0.0",
22
+ "mocha": "^9.1.3",
23
+ "nlptoolkit-corpus": "^1.0.0",
24
+ "nlptoolkit-datastructure": "^1.0.0",
25
+ "nlptoolkit-dictionary": "^1.0.1",
26
+ "nlptoolkit-hmm": "^1.0.0",
27
+ "nlptoolkit-math": "^1.0.0",
28
+ "ts-node": "^10.4.0",
29
+ "typescript": "^4.5.2"
30
+ }
31
+ }
@@ -0,0 +1,38 @@
1
+ import {PosTagger} from "./PosTagger";
2
+ import {Sentence} from "nlptoolkit-corpus/dist/Sentence";
3
+ import {PosTaggedCorpus} from "./PosTaggedCorpus";
4
+ import {PosTaggedWord} from "./PosTaggedWord";
5
+
6
+ export class DummyPosTagger implements PosTagger{
7
+
8
+ private tagList: Array<string> = new Array<string>()
9
+
10
+ /**
11
+ * Test method for the Dummy pos tagger. For each word, the method chooses randomly a tag from all possible
12
+ * tag list.
13
+ *
14
+ * @param sentence Sentence to be tagged.
15
+ * @return Annotated (tagged) sentence.
16
+ */
17
+ posTag(sentence: Sentence): Sentence {
18
+ let result = new Sentence();
19
+ for (let i = 0; i < sentence.wordCount(); i++){
20
+ result.addWord(new PosTaggedWord(sentence.getWord(i).getName(),
21
+ this.tagList[Math.floor(Math.random() * this.tagList.length)]));
22
+ }
23
+ return result;
24
+ }
25
+
26
+ /**
27
+ * Train method for the Dummy pos tagger. The algorithm gets all possible tag list.
28
+ *
29
+ * @param corpus Training data for the tagger.
30
+ */
31
+ train(corpus: PosTaggedCorpus): void {
32
+ let corpusTagList = corpus.getTagList();
33
+ for (let tag of corpusTagList){
34
+ this.tagList.push(tag)
35
+ }
36
+ }
37
+
38
+ }
@@ -0,0 +1,51 @@
1
+ import {PosTagger} from "./PosTagger";
2
+ import {Sentence} from "nlptoolkit-corpus/dist/Sentence";
3
+ import {PosTaggedCorpus} from "./PosTaggedCorpus";
4
+ import {Hmm} from "nlptoolkit-hmm/dist/Hmm";
5
+ import {Word} from "nlptoolkit-dictionary/dist/Dictionary/Word";
6
+ import {PosTaggedWord} from "./PosTaggedWord";
7
+ import {Hmm1} from "nlptoolkit-hmm/dist/Hmm1";
8
+
9
+ export class HmmPosTagger implements PosTagger{
10
+
11
+ private hmm: Hmm<string, Word>
12
+
13
+ /**
14
+ * Test method for the Hmm pos tagger. For each sentence, the method uses the viterbi algorithm to produce the
15
+ * most possible state sequence for the given sentence.
16
+ *
17
+ * @param sentence Sentence to be tagged.
18
+ * @return Annotated (tagged) sentence.
19
+ */
20
+ posTag(sentence: Sentence): Sentence {
21
+ let result = new Sentence();
22
+ let tagList = this.hmm.viterbi(sentence.getWords());
23
+ for (let i = 0; i < sentence.wordCount(); i++){
24
+ result.addWord(new PosTaggedWord(sentence.getWord(i).getName(), tagList[i]));
25
+ }
26
+ return result;
27
+ }
28
+
29
+ /**
30
+ * Train method for the Hmm pos tagger. The algorithm trains an Hmm from the corpus, where corpus constitutes
31
+ * as an observation array.
32
+ *
33
+ * @param corpus Training data for the tagger.
34
+ */
35
+ train(corpus: PosTaggedCorpus): void {
36
+ let emittedSymbols = new Array<Array<string>>()
37
+ for (let i = 0; i < emittedSymbols.length; i++){
38
+ emittedSymbols.push(new Array<string>());
39
+ for (let j = 0; j < corpus.getSentence(i).wordCount(); j++){
40
+ let word = <PosTaggedWord>corpus.getSentence(i).getWord(j);
41
+ emittedSymbols[i].push(word.getTag());
42
+ }
43
+ }
44
+ let tagList = new Set<string>()
45
+ for (let tag of corpus.getTagList()){
46
+ tagList.add(tag)
47
+ }
48
+ this.hmm = new Hmm1<string, Word>(tagList, emittedSymbols, corpus.getAllWordsAsArrayList());
49
+ }
50
+
51
+ }
@@ -0,0 +1,53 @@
1
+ import {PosTagger} from "./PosTagger";
2
+ import {Sentence} from "nlptoolkit-corpus/dist/Sentence";
3
+ import {PosTaggedCorpus} from "./PosTaggedCorpus";
4
+ import {PosTaggedWord} from "./PosTaggedWord";
5
+ import {CounterHashMap} from "nlptoolkit-datastructure/dist/CounterHashMap";
6
+
7
+ export class NaivePosTagger implements PosTagger{
8
+
9
+ private maxMap: Map<string, string>
10
+
11
+ /**
12
+ * Test method for the Naive pos tagger. For each word, the method chooses the maximum a posterior tag from all
13
+ * possible tag list for that word.
14
+ *
15
+ * @param sentence Sentence to be tagged.
16
+ * @return Annotated (tagged) sentence.
17
+ */
18
+ posTag(sentence: Sentence): Sentence {
19
+ let result = new Sentence();
20
+ for (let i = 0; i < sentence.wordCount(); i++){
21
+ result.addWord(new PosTaggedWord(sentence.getWord(i).getName(), this.maxMap.get(sentence.getWord(i).getName())));
22
+ }
23
+ return result;
24
+ }
25
+
26
+ /**
27
+ * Train method for the Naive pos tagger. The algorithm gets all possible tag list. Then counts all
28
+ * possible tags (with its counts) for each possible word.
29
+ *
30
+ * @param corpus Training data for the tagger.
31
+ */
32
+ train(corpus: PosTaggedCorpus): void {
33
+ let map = new Map<string, CounterHashMap<string>>();
34
+ for (let i = 0; i < corpus.sentenceCount(); i++){
35
+ let s = corpus.getSentence(i);
36
+ for (let j = 0; j < s.wordCount(); j++){
37
+ let word = <PosTaggedWord> corpus.getSentence(i).getWord(j);
38
+ if (map.has(word.getName())){
39
+ map.get(word.getName()).put(word.getTag());
40
+ } else {
41
+ let counterMap = new CounterHashMap<string>();
42
+ counterMap.put(word.getTag());
43
+ map.set(word.getName(), counterMap);
44
+ }
45
+ }
46
+ }
47
+ this.maxMap = new Map<string, string>();
48
+ for (let word of map.keys()){
49
+ this.maxMap.set(word, map.get(word).max());
50
+ }
51
+ }
52
+
53
+ }