nlptoolkit-postagger 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +85 -0
- package/brown.txt +89329 -0
- package/dist/DummyPosTagger.d.ts +20 -0
- package/dist/DummyPosTagger.js +47 -0
- package/dist/DummyPosTagger.js.map +1 -0
- package/dist/HmmPosTagger.d.ts +21 -0
- package/dist/HmmPosTagger.js +56 -0
- package/dist/HmmPosTagger.js.map +1 -0
- package/dist/NaivePosTagger.d.ts +21 -0
- package/dist/NaivePosTagger.js +61 -0
- package/dist/NaivePosTagger.js.map +1 -0
- package/dist/PosTaggedCorpus.d.ts +15 -0
- package/dist/PosTaggedCorpus.js +75 -0
- package/dist/PosTaggedCorpus.js.map +1 -0
- package/dist/PosTaggedWord.d.ts +16 -0
- package/dist/PosTaggedWord.js +35 -0
- package/dist/PosTaggedWord.js.map +1 -0
- package/dist/PosTagger.d.ts +6 -0
- package/dist/PosTagger.js +13 -0
- package/dist/PosTagger.js.map +1 -0
- package/index.js +6 -0
- package/package.json +31 -0
- package/source/DummyPosTagger.ts +38 -0
- package/source/HmmPosTagger.ts +51 -0
- package/source/NaivePosTagger.ts +53 -0
- package/source/PosTaggedCorpus.ts +62 -0
- package/source/PosTaggedWord.ts +25 -0
- package/source/PosTagger.ts +9 -0
- package/source/tsconfig.json +13 -0
- package/tests/DummyPosTaggerTest.ts +26 -0
- package/tests/HmmPosTaggerTest.ts +26 -0
- package/tests/NaivePosTaggerTest.ts +26 -0
- package/tsconfig.json +15 -0
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import {Corpus} from "nlptoolkit-corpus/dist/Corpus";
|
|
2
|
+
import {CounterHashMap} from "nlptoolkit-datastructure/dist/CounterHashMap";
|
|
3
|
+
import {Sentence} from "nlptoolkit-corpus/dist/Sentence";
|
|
4
|
+
import * as fs from "fs";
|
|
5
|
+
import {PosTaggedWord} from "./PosTaggedWord";
|
|
6
|
+
|
|
7
|
+
export class PosTaggedCorpus extends Corpus{
|
|
8
|
+
|
|
9
|
+
private tagList: CounterHashMap<string> = new CounterHashMap<string>()
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* A constructor of {@link PosTaggedCorpus} which initializes the sentences of the corpus, the word list of
|
|
13
|
+
* the corpus, and all possible tags.
|
|
14
|
+
*/
|
|
15
|
+
constructor(fileName?: string) {
|
|
16
|
+
super();
|
|
17
|
+
if (fileName != undefined) {
|
|
18
|
+
let newSentence = new Sentence();
|
|
19
|
+
let data = fs.readFileSync(fileName, 'utf8')
|
|
20
|
+
let lines = data.split("\n")
|
|
21
|
+
for (let line of lines) {
|
|
22
|
+
let words = line.split(/\s/);
|
|
23
|
+
for (let word of words){
|
|
24
|
+
if (word != ""){
|
|
25
|
+
if (word.includes("/")){
|
|
26
|
+
let name = word.substring(0, word.lastIndexOf('/'));
|
|
27
|
+
let tag = word.substring(word.lastIndexOf('/') + 1);
|
|
28
|
+
let shortTag
|
|
29
|
+
if (tag.includes("+")){
|
|
30
|
+
shortTag = tag.substring(0, tag.indexOf("+"));
|
|
31
|
+
} else {
|
|
32
|
+
if (tag.includes("-")){
|
|
33
|
+
shortTag = tag.substring(0, tag.indexOf("-"));
|
|
34
|
+
} else {
|
|
35
|
+
shortTag = tag;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
this.tagList.put(shortTag);
|
|
39
|
+
newSentence.addWord(new PosTaggedWord(name, shortTag));
|
|
40
|
+
if (tag == "."){
|
|
41
|
+
this.addSentence(newSentence);
|
|
42
|
+
newSentence = new Sentence();
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
if (newSentence.wordCount() > 0){
|
|
49
|
+
this.addSentence(newSentence);
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* getTagList returns all possible tags as a set.
|
|
56
|
+
*
|
|
57
|
+
* @return Set of all possible tags.
|
|
58
|
+
*/
|
|
59
|
+
getTagList(): IterableIterator<string>{
|
|
60
|
+
return this.tagList.keys()
|
|
61
|
+
}
|
|
62
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import {Word} from "nlptoolkit-dictionary/dist/Dictionary/Word";
|
|
2
|
+
|
|
3
|
+
export class PosTaggedWord extends Word{
|
|
4
|
+
|
|
5
|
+
private tag: string
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* A constructor of {@link PosTaggedWord} which takes name and tag as input and sets the corresponding attributes
|
|
9
|
+
* @param name Name of the word
|
|
10
|
+
* @param tag Tag of the word
|
|
11
|
+
*/
|
|
12
|
+
constructor(name: string, tag: string) {
|
|
13
|
+
super(name);
|
|
14
|
+
this.tag = tag
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Accessor method for tag attribute.
|
|
19
|
+
*
|
|
20
|
+
* @return Tag of the word.
|
|
21
|
+
*/
|
|
22
|
+
getTag(): string{
|
|
23
|
+
return this.tag
|
|
24
|
+
}
|
|
25
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import * as assert from "assert";
|
|
2
|
+
import {DummyPosTagger} from "../dist/DummyPosTagger";
|
|
3
|
+
import {PosTaggedCorpus} from "../dist/PosTaggedCorpus";
|
|
4
|
+
import {PosTaggedWord} from "../dist/PosTaggedWord";
|
|
5
|
+
|
|
6
|
+
describe('DummyPosTaggerTest', function() {
|
|
7
|
+
describe('DummyPosTaggerTest', function() {
|
|
8
|
+
it('testPosTag', function() {
|
|
9
|
+
let posTagger = new DummyPosTagger();
|
|
10
|
+
let posTaggedCorpus = new PosTaggedCorpus("brown.txt");
|
|
11
|
+
posTagger.train(posTaggedCorpus);
|
|
12
|
+
let correct = 0, incorrect = 0;
|
|
13
|
+
for (let i = 0; i < posTaggedCorpus.sentenceCount(); i++){
|
|
14
|
+
let taggedSentence = posTagger.posTag(posTaggedCorpus.getSentence(i));
|
|
15
|
+
for (let j = 0; j < taggedSentence.wordCount(); j++){
|
|
16
|
+
if ((<PosTaggedWord>posTaggedCorpus.getSentence(i).getWord(j)).getTag() == (<PosTaggedWord>taggedSentence.getWord(j)).getTag()){
|
|
17
|
+
correct++;
|
|
18
|
+
} else {
|
|
19
|
+
incorrect++;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
assert.ok(Math.abs(100 * correct / (correct + incorrect) - 0.88) < 0.01);
|
|
24
|
+
});
|
|
25
|
+
});
|
|
26
|
+
});
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import * as assert from "assert";
|
|
2
|
+
import {PosTaggedCorpus} from "../dist/PosTaggedCorpus";
|
|
3
|
+
import {PosTaggedWord} from "../dist/PosTaggedWord";
|
|
4
|
+
import {HmmPosTagger} from "../dist/HmmPosTagger";
|
|
5
|
+
|
|
6
|
+
describe('HmmPosTaggerTest', function() {
|
|
7
|
+
describe('HmmPosTaggerTest', function() {
|
|
8
|
+
it('testPosTag', function() {
|
|
9
|
+
let posTagger = new HmmPosTagger();
|
|
10
|
+
let posTaggedCorpus = new PosTaggedCorpus("brown.txt");
|
|
11
|
+
posTagger.train(posTaggedCorpus);
|
|
12
|
+
let correct = 0, incorrect = 0;
|
|
13
|
+
for (let i = 0; i < posTaggedCorpus.sentenceCount(); i++){
|
|
14
|
+
let taggedSentence = posTagger.posTag(posTaggedCorpus.getSentence(i));
|
|
15
|
+
for (let j = 0; j < taggedSentence.wordCount(); j++){
|
|
16
|
+
if ((<PosTaggedWord>posTaggedCorpus.getSentence(i).getWord(j)).getTag() == (<PosTaggedWord>taggedSentence.getWord(j)).getTag()){
|
|
17
|
+
correct++;
|
|
18
|
+
} else {
|
|
19
|
+
incorrect++;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
assert.ok(Math.abs(100 * correct / (correct + incorrect) - 97.59) < 0.01);
|
|
24
|
+
});
|
|
25
|
+
});
|
|
26
|
+
});
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import * as assert from "assert";
|
|
2
|
+
import {PosTaggedCorpus} from "../dist/PosTaggedCorpus";
|
|
3
|
+
import {PosTaggedWord} from "../dist/PosTaggedWord";
|
|
4
|
+
import {NaivePosTagger} from "../dist/NaivePosTagger";
|
|
5
|
+
|
|
6
|
+
describe('NaivePosTaggerTest', function() {
|
|
7
|
+
describe('NaivePosTaggerTest', function() {
|
|
8
|
+
it('testPosTag', function() {
|
|
9
|
+
let posTagger = new NaivePosTagger();
|
|
10
|
+
let posTaggedCorpus = new PosTaggedCorpus("brown.txt");
|
|
11
|
+
posTagger.train(posTaggedCorpus);
|
|
12
|
+
let correct = 0, incorrect = 0;
|
|
13
|
+
for (let i = 0; i < posTaggedCorpus.sentenceCount(); i++){
|
|
14
|
+
let taggedSentence = posTagger.posTag(posTaggedCorpus.getSentence(i));
|
|
15
|
+
for (let j = 0; j < taggedSentence.wordCount(); j++){
|
|
16
|
+
if ((<PosTaggedWord>posTaggedCorpus.getSentence(i).getWord(j)).getTag() == (<PosTaggedWord>taggedSentence.getWord(j)).getTag()){
|
|
17
|
+
correct++;
|
|
18
|
+
} else {
|
|
19
|
+
incorrect++;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
assert.ok(Math.abs(100 * correct / (correct + incorrect) - 93.69) < 0.01);
|
|
24
|
+
});
|
|
25
|
+
});
|
|
26
|
+
});
|
package/tsconfig.json
ADDED