nlptoolkit-universaldependencyparser 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/Agenda.d.ts +24 -0
- package/dist/Agenda.js +72 -0
- package/dist/Agenda.js.map +1 -0
- package/dist/ArcEagerInstanceGenerator.d.ts +20 -0
- package/dist/ArcEagerInstanceGenerator.js +81 -0
- package/dist/ArcEagerInstanceGenerator.js.map +1 -0
- package/dist/ArcEagerOracle.d.ts +9 -0
- package/dist/ArcEagerOracle.js +39 -0
- package/dist/ArcEagerOracle.js.map +1 -0
- package/dist/ArcEagerTransitionParser.d.ts +9 -0
- package/dist/ArcEagerTransitionParser.js +113 -0
- package/dist/ArcEagerTransitionParser.js.map +1 -0
- package/dist/ArcStandardOracle.d.ts +9 -0
- package/dist/ArcStandardOracle.js +36 -0
- package/dist/ArcStandardOracle.js.map +1 -0
- package/dist/ArcStandardTransitionParser.d.ts +28 -0
- package/dist/ArcStandardTransitionParser.js +129 -0
- package/dist/ArcStandardTransitionParser.js.map +1 -0
- package/dist/Candidate.d.ts +8 -0
- package/dist/Candidate.js +27 -0
- package/dist/Candidate.js.map +1 -0
- package/dist/Command.d.ts +6 -0
- package/dist/Command.js +8 -0
- package/dist/Command.js.map +1 -0
- package/dist/Decision.d.ts +7 -0
- package/dist/Decision.js +25 -0
- package/dist/Decision.js.map +1 -0
- package/dist/InstanceGenerator.d.ts +33 -0
- package/dist/InstanceGenerator.js +77 -0
- package/dist/InstanceGenerator.js.map +1 -0
- package/dist/Oracle.d.ts +49 -0
- package/dist/Oracle.js +115 -0
- package/dist/Oracle.js.map +1 -0
- package/dist/RandomOracle.d.ts +14 -0
- package/dist/RandomOracle.js +45 -0
- package/dist/RandomOracle.js.map +1 -0
- package/dist/RandomScoringOracle.d.ts +5 -0
- package/dist/RandomScoringOracle.js +21 -0
- package/dist/RandomScoringOracle.js.map +1 -0
- package/dist/ScoringOracle.d.ts +4 -0
- package/dist/ScoringOracle.js +17 -0
- package/dist/ScoringOracle.js.map +1 -0
- package/dist/SimpleInstanceGenerator.d.ts +14 -0
- package/dist/SimpleInstanceGenerator.js +67 -0
- package/dist/SimpleInstanceGenerator.js.map +1 -0
- package/dist/StackRelation.d.ts +10 -0
- package/dist/StackRelation.js +30 -0
- package/dist/StackRelation.js.map +1 -0
- package/dist/StackWord.d.ts +11 -0
- package/dist/StackWord.js +42 -0
- package/dist/StackWord.js.map +1 -0
- package/dist/State.d.ts +25 -0
- package/dist/State.js +161 -0
- package/dist/State.js.map +1 -0
- package/dist/TransitionParser.d.ts +73 -0
- package/dist/TransitionParser.js +164 -0
- package/dist/TransitionParser.js.map +1 -0
- package/dist/TransitionSystem.d.ts +4 -0
- package/dist/TransitionSystem.js +6 -0
- package/dist/TransitionSystem.js.map +1 -0
- package/index.js +20 -0
- package/models/en_atis_eager_c45_2.txt +1903 -0
- package/models/en_atis_eager_c45_3.txt +1918 -0
- package/models/en_atis_standard_c45_2.txt +1948 -0
- package/models/en_atis_standard_c45_3.txt +2233 -0
- package/models/tr_atis_eager_c45_2.txt +2208 -0
- package/models/tr_atis_eager_c45_3.txt +2303 -0
- package/models/tr_atis_standard_c45_2.txt +2863 -0
- package/models/tr_atis_standard_c45_3.txt +3413 -0
- package/models/tr_boun_eager_c45_2.txt +1613 -0
- package/models/tr_boun_eager_c45_3.txt +1818 -0
- package/models/tr_boun_standard_c45_2.txt +898 -0
- package/models/tr_boun_standard_c45_3.txt +1138 -0
- package/models/tr_framenet_eager_c45_2.txt +453 -0
- package/models/tr_framenet_eager_c45_3.txt +668 -0
- package/models/tr_framenet_standard_c45_2.txt +363 -0
- package/models/tr_framenet_standard_c45_3.txt +238 -0
- package/models/tr_kenet_eager_c45_2.txt +1788 -0
- package/models/tr_kenet_eager_c45_3.txt +1588 -0
- package/models/tr_kenet_standard_c45_2.txt +1308 -0
- package/models/tr_kenet_standard_c45_3.txt +1378 -0
- package/models/tr_penn_eager_c45_2.txt +2788 -0
- package/models/tr_penn_eager_c45_3.txt +1718 -0
- package/models/tr_penn_standard_c45_2.txt +2203 -0
- package/models/tr_penn_standard_c45_3.txt +1493 -0
- package/models/tr_tourism_eager_c45_2.txt +3468 -0
- package/models/tr_tourism_eager_c45_3.txt +4213 -0
- package/models/tr_tourism_standard_c45_2.txt +3868 -0
- package/models/tr_tourism_standard_c45_3.txt +4728 -0
- package/package.json +30 -0
- package/source/Parser/TransitionBasedParser/Agenda.ts +66 -0
- package/source/Parser/TransitionBasedParser/ArcEagerInstanceGenerator.ts +73 -0
- package/source/Parser/TransitionBasedParser/ArcEagerOracle.ts +30 -0
- package/source/Parser/TransitionBasedParser/ArcEagerTransitionParser.ts +105 -0
- package/source/Parser/TransitionBasedParser/ArcStandardOracle.ts +28 -0
- package/source/Parser/TransitionBasedParser/ArcStandardTransitionParser.ts +123 -0
- package/source/Parser/TransitionBasedParser/Candidate.ts +20 -0
- package/source/Parser/TransitionBasedParser/Command.ts +3 -0
- package/source/Parser/TransitionBasedParser/Decision.ts +16 -0
- package/source/Parser/TransitionBasedParser/InstanceGenerator.ts +85 -0
- package/source/Parser/TransitionBasedParser/Oracle.ts +123 -0
- package/source/Parser/TransitionBasedParser/RandomOracle.ts +37 -0
- package/source/Parser/TransitionBasedParser/RandomScoringOracle.ts +10 -0
- package/source/Parser/TransitionBasedParser/ScoringOracle.ts +5 -0
- package/source/Parser/TransitionBasedParser/SimpleInstanceGenerator.ts +57 -0
- package/source/Parser/TransitionBasedParser/StackRelation.ts +27 -0
- package/source/Parser/TransitionBasedParser/StackWord.ts +38 -0
- package/source/Parser/TransitionBasedParser/State.ts +168 -0
- package/source/Parser/TransitionBasedParser/TransitionParser.ts +187 -0
- package/source/Parser/TransitionBasedParser/TransitionSystem.ts +3 -0
- package/source/tsconfig.json +13 -0
- package/tsconfig.json +15 -0
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
import {Model} from "nlptoolkit-classification/dist/Model/Model";
|
|
2
|
+
import {Decision} from "./Decision";
|
|
3
|
+
import {State} from "./State";
|
|
4
|
+
import {Candidate} from "./Candidate";
|
|
5
|
+
import {UniversalDependencyRelation} from "nlptoolkit-dependencyparser/dist/Universal/UniversalDependencyRelation";
|
|
6
|
+
import {UniversalDependencyType} from "nlptoolkit-dependencyparser/dist/Universal/UniversalDependencyType";
|
|
7
|
+
|
|
8
|
+
export abstract class Oracle {
|
|
9
|
+
|
|
10
|
+
protected commandModel: Model
|
|
11
|
+
protected windowSize: number
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Constructs an Oracle with the given model and window size.
|
|
15
|
+
* @param model the model used for making predictions
|
|
16
|
+
* @param windowSize the size of the window used in parsing
|
|
17
|
+
*/
|
|
18
|
+
protected constructor(model: Model, windowSize: number) {
|
|
19
|
+
this.commandModel = model
|
|
20
|
+
this.windowSize = windowSize
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Abstract method to be implemented by subclasses to make a parsing decision based on the current state.
|
|
25
|
+
* @param state the current parsing state
|
|
26
|
+
* @return a {@link Decision} object representing the action to be taken
|
|
27
|
+
*/
|
|
28
|
+
abstract makeDecision(state: State): Decision
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Abstract method to be implemented by subclasses to score potential decisions based on the current state and transition system.
|
|
32
|
+
* @param state the current parsing state
|
|
33
|
+
* @param transitionSystem the transition system being used (e.g., ARC_STANDARD or ARC_EAGER)
|
|
34
|
+
* @return a list of {@link Decision} objects, each with a score indicating its suitability
|
|
35
|
+
*/
|
|
36
|
+
protected abstract scoreDecisions(state: State, transitionSystem: TransitionSystem): Array<Decision>
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Finds the best valid parsing action for the ARC_EAGER transition system based on probabilities.
|
|
40
|
+
* Ensures the action is applicable given the current state.
|
|
41
|
+
* @param probabilities a map of actions to their associated probabilities
|
|
42
|
+
* @param state the current parsing state
|
|
43
|
+
* @return the best action as a string, or an empty string if no valid action is found
|
|
44
|
+
*/
|
|
45
|
+
protected findBestValidEagerClassInfo(probabilities: Map<string, number>, state: State): string{
|
|
46
|
+
let bestValue = 0.0
|
|
47
|
+
let best = ""
|
|
48
|
+
for (let key in probabilities){
|
|
49
|
+
if (probabilities.get(key) > bestValue){
|
|
50
|
+
if (key == "SHIFT" || key == "RIGHTARC"){
|
|
51
|
+
if (state.wordListSize() > 0){
|
|
52
|
+
best = key
|
|
53
|
+
bestValue = probabilities.get(key)
|
|
54
|
+
}
|
|
55
|
+
} else {
|
|
56
|
+
if (state.stackSize() > 1){
|
|
57
|
+
if (!(key == "REDUCE" && state.getPeek().getRelation() == null)){
|
|
58
|
+
best = key
|
|
59
|
+
bestValue = probabilities.get(key)
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
return best
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Finds the best valid parsing action for the ARC_STANDARD transition system based on probabilities.
|
|
70
|
+
* Ensures the action is applicable given the current state.
|
|
71
|
+
* @param probabilities a map of actions to their associated probabilities
|
|
72
|
+
* @param state the current parsing state
|
|
73
|
+
* @return the best action as a string, or an empty string if no valid action is found
|
|
74
|
+
*/
|
|
75
|
+
protected findBestValidStandardClassInfo(probabilities: Map<string, number>, state: State): string{
|
|
76
|
+
let bestValue = 0.0
|
|
77
|
+
let best = ""
|
|
78
|
+
for (let key in probabilities){
|
|
79
|
+
if (probabilities.get(key) > bestValue){
|
|
80
|
+
if (key == "SHIFT"){
|
|
81
|
+
if (state.wordListSize() > 0){
|
|
82
|
+
best = key
|
|
83
|
+
bestValue = probabilities.get(key)
|
|
84
|
+
}
|
|
85
|
+
} else {
|
|
86
|
+
if (state.stackSize() > 1){
|
|
87
|
+
best = key
|
|
88
|
+
bestValue = probabilities.get(key)
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
return best
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Converts a string representation of the best action into a {@link Candidate} object.
|
|
98
|
+
* @param best the best action represented as a string, possibly with a dependency type in parentheses
|
|
99
|
+
* @return a {@link Candidate} object representing the action, or null if the action is unknown
|
|
100
|
+
*/
|
|
101
|
+
protected getDecisionCandidate(best: string): Candidate{
|
|
102
|
+
let command, type
|
|
103
|
+
if (best.includes("(")){
|
|
104
|
+
command = best.substring(0, best.indexOf('('))
|
|
105
|
+
let relation = best.substring(best.indexOf('(') + 1, best.indexOf(')'))
|
|
106
|
+
type = UniversalDependencyRelation.getDependencyTag(relation)
|
|
107
|
+
} else {
|
|
108
|
+
command = best
|
|
109
|
+
type = UniversalDependencyType.DEP
|
|
110
|
+
}
|
|
111
|
+
switch (command){
|
|
112
|
+
case "SHIFT":
|
|
113
|
+
return new Candidate(Command.SHIFT, type)
|
|
114
|
+
case "REDUCE":
|
|
115
|
+
return new Candidate(Command.REDUCE, type)
|
|
116
|
+
case "LEFTARC":
|
|
117
|
+
return new Candidate(Command.LEFTARC, type)
|
|
118
|
+
case "RIGHTARC":
|
|
119
|
+
return new Candidate(Command.RIGHTARC, type)
|
|
120
|
+
}
|
|
121
|
+
return null
|
|
122
|
+
}
|
|
123
|
+
}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import {Oracle} from "./Oracle";
|
|
2
|
+
import {State} from "./State";
|
|
3
|
+
import {Decision} from "./Decision";
|
|
4
|
+
import {Model} from "nlptoolkit-classification/dist/Model/Model";
|
|
5
|
+
import {UniversalDependencyType} from "nlptoolkit-dependencyparser/dist/Universal/UniversalDependencyType";
|
|
6
|
+
import {UniversalDependencyRelation} from "nlptoolkit-dependencyparser/dist/Universal/UniversalDependencyRelation";
|
|
7
|
+
|
|
8
|
+
export class RandomOracle extends Oracle{
|
|
9
|
+
|
|
10
|
+
constructor(model: Model, windowSize: number) {
|
|
11
|
+
super(model, windowSize)
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Makes a random decision based on a uniform distribution over possible actions.
|
|
16
|
+
* @param state The current state of the parser.
|
|
17
|
+
* @return A Decision object representing the randomly chosen action.
|
|
18
|
+
*/
|
|
19
|
+
public makeDecision(state: State): Decision {
|
|
20
|
+
let command = Math.floor(Math.random() * 3)
|
|
21
|
+
let relation = Math.floor(Math.random() * 58)
|
|
22
|
+
switch (command){
|
|
23
|
+
case 0:
|
|
24
|
+
return new Decision(Command.LEFTARC, UniversalDependencyRelation.universalDependencyTags[relation], 0)
|
|
25
|
+
case 1:
|
|
26
|
+
return new Decision(Command.RIGHTARC, UniversalDependencyRelation.universalDependencyTags[relation], 0)
|
|
27
|
+
case 2:
|
|
28
|
+
return new Decision(Command.SHIFT, UniversalDependencyType.DEP, 0)
|
|
29
|
+
}
|
|
30
|
+
return null
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
protected scoreDecisions(state: State, transitionSystem: TransitionSystem): Array<Decision> {
|
|
34
|
+
return null;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import {InstanceGenerator} from "./InstanceGenerator";
|
|
2
|
+
import {State} from "./State";
|
|
3
|
+
import {Instance} from "nlptoolkit-classification/dist/Instance/Instance";
|
|
4
|
+
import {DiscreteIndexedAttribute} from "nlptoolkit-classification/dist/Attribute/DiscreteIndexedAttribute";
|
|
5
|
+
import {
|
|
6
|
+
UniversalDependencyTreeBankFeatures
|
|
7
|
+
} from "nlptoolkit-dependencyparser/dist/Universal/UniversalDependencyTreeBankFeatures";
|
|
8
|
+
import {Attribute} from "nlptoolkit-classification/dist/Attribute/Attribute";
|
|
9
|
+
|
|
10
|
+
export class SimpleInstanceGenerator extends InstanceGenerator{
|
|
11
|
+
|
|
12
|
+
constructor() {
|
|
13
|
+
super();
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Generates an instance based on the state, window size, and command.
|
|
18
|
+
* @param state The current state of the parser, which includes the stack and word list.
|
|
19
|
+
* @param windowSize The size of the window used for feature extraction.
|
|
20
|
+
* @param command The command to be associated with the generated instance.
|
|
21
|
+
* @return The generated {@link Instance} object with attributes based on the state and command.
|
|
22
|
+
*/
|
|
23
|
+
generate(state: State, windowSize: number, command: string): Instance {
|
|
24
|
+
let instance = new Instance(command)
|
|
25
|
+
let attributes = new Array<Attribute>()
|
|
26
|
+
for (let i = 0; i < windowSize; i++) {
|
|
27
|
+
let word = state.getStackWord(i);
|
|
28
|
+
if (word == null) {
|
|
29
|
+
attributes.push(new DiscreteIndexedAttribute("null", 0, 18))
|
|
30
|
+
this.addEmptyAttributes(attributes)
|
|
31
|
+
} else {
|
|
32
|
+
if (word.getName() == "root") {
|
|
33
|
+
attributes.push(new DiscreteIndexedAttribute("root", 0, 18))
|
|
34
|
+
this.addEmptyAttributes(attributes)
|
|
35
|
+
} else {
|
|
36
|
+
attributes.push(new DiscreteIndexedAttribute(word.getUpos().toString(), UniversalDependencyTreeBankFeatures.posIndex(word.getUpos().toString()) + 1, 18))
|
|
37
|
+
this.addFeatureAttributes(word, attributes)
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
for (let i = 0; i < windowSize; i++) {
|
|
42
|
+
let word = state.getWordListWord(i)
|
|
43
|
+
if (word != null) {
|
|
44
|
+
attributes.push(new DiscreteIndexedAttribute(word.getUpos().toString(), UniversalDependencyTreeBankFeatures.posIndex(word.getUpos().toString()) + 1, 18))
|
|
45
|
+
this.addFeatureAttributes(word, attributes)
|
|
46
|
+
} else {
|
|
47
|
+
attributes.push(new DiscreteIndexedAttribute("root", 0, 18))
|
|
48
|
+
this.addEmptyAttributes(attributes)
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
for (let attribute of attributes) {
|
|
52
|
+
instance.addAttribute(attribute)
|
|
53
|
+
}
|
|
54
|
+
return instance
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import {
|
|
2
|
+
UniversalDependencyTreeBankWord
|
|
3
|
+
} from "nlptoolkit-dependencyparser/dist/Universal/UniversalDependencyTreeBankWord";
|
|
4
|
+
import {UniversalDependencyRelation} from "nlptoolkit-dependencyparser/dist/Universal/UniversalDependencyRelation";
|
|
5
|
+
|
|
6
|
+
export class StackRelation {
|
|
7
|
+
|
|
8
|
+
private readonly word: UniversalDependencyTreeBankWord
|
|
9
|
+
private readonly relation: UniversalDependencyRelation
|
|
10
|
+
|
|
11
|
+
constructor(word: UniversalDependencyTreeBankWord, relation: UniversalDependencyRelation) {
|
|
12
|
+
this.word = word
|
|
13
|
+
this.relation = relation
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
clone(): StackRelation{
|
|
17
|
+
return new StackRelation(this.word.clone(), this.relation)
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
getWord(): UniversalDependencyTreeBankWord{
|
|
21
|
+
return this.word
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
getRelation(): UniversalDependencyRelation{
|
|
25
|
+
return this.relation
|
|
26
|
+
}
|
|
27
|
+
}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import {
|
|
2
|
+
UniversalDependencyTreeBankWord
|
|
3
|
+
} from "nlptoolkit-dependencyparser/dist/Universal/UniversalDependencyTreeBankWord";
|
|
4
|
+
|
|
5
|
+
export class StackWord {
|
|
6
|
+
|
|
7
|
+
private word: UniversalDependencyTreeBankWord
|
|
8
|
+
private toWord: number
|
|
9
|
+
|
|
10
|
+
constructor1() {
|
|
11
|
+
this.word = new UniversalDependencyTreeBankWord()
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
constructor2(word: UniversalDependencyTreeBankWord, toWord: number){
|
|
15
|
+
this.word = word
|
|
16
|
+
this.toWord = toWord
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
clone(): StackWord{
|
|
20
|
+
return new StackWord(this.word.clone(), this.toWord)
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
constructor(word?: UniversalDependencyTreeBankWord, toWord?: number) {
|
|
24
|
+
if (word == undefined){
|
|
25
|
+
this.constructor1()
|
|
26
|
+
} else {
|
|
27
|
+
this.constructor2(word, toWord)
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
getWord(): UniversalDependencyTreeBankWord{
|
|
32
|
+
return this.word
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
getToWord(): number{
|
|
36
|
+
return this.toWord
|
|
37
|
+
}
|
|
38
|
+
}
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
import {StackWord} from "./StackWord";
|
|
2
|
+
import {StackRelation} from "./StackRelation";
|
|
3
|
+
import {UniversalDependencyType} from "nlptoolkit-dependencyparser/dist/Universal/UniversalDependencyType";
|
|
4
|
+
import {UniversalDependencyRelation} from "nlptoolkit-dependencyparser/dist/Universal/UniversalDependencyRelation";
|
|
5
|
+
import {
|
|
6
|
+
UniversalDependencyTreeBankWord
|
|
7
|
+
} from "nlptoolkit-dependencyparser/dist/Universal/UniversalDependencyTreeBankWord";
|
|
8
|
+
|
|
9
|
+
export class State {
|
|
10
|
+
|
|
11
|
+
private stack: Array<StackWord>
|
|
12
|
+
private wordList: Array<StackWord>
|
|
13
|
+
private relations: Array<StackRelation>
|
|
14
|
+
|
|
15
|
+
constructor(stack: Array<StackWord>, wordList: Array<StackWord>, relations: Array<StackRelation>){
|
|
16
|
+
this.stack = stack
|
|
17
|
+
this.wordList = wordList
|
|
18
|
+
this.relations = relations
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
public applyShift(){
|
|
22
|
+
if (this.wordList.length > 0){
|
|
23
|
+
this.stack.push(this.wordList.splice(0, 1)[0])
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
public applyLeftArc(type: UniversalDependencyType){
|
|
28
|
+
if (this.stack.length > 1){
|
|
29
|
+
let beforeLast = this.stack[this.stack.length - 2].getWord()
|
|
30
|
+
let index = this.stack[this.stack.length - 1].getToWord()
|
|
31
|
+
beforeLast.setRelation(new UniversalDependencyRelation(index, type.toString().replace("_", ":")))
|
|
32
|
+
this.stack.splice(this.stack.length - 2, 1)
|
|
33
|
+
this.relations.push(new StackRelation(beforeLast, new UniversalDependencyRelation(index, type.toString().replace("_", ":"))))
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
public applyRightArc(type: UniversalDependencyType){
|
|
38
|
+
if (this.stack.length > 1){
|
|
39
|
+
let last = this.stack[this.stack.length - 1].getWord()
|
|
40
|
+
let index = this.stack[this.stack.length - 2].getToWord()
|
|
41
|
+
last.setRelation(new UniversalDependencyRelation(index, type.toString().replace("_", ":")))
|
|
42
|
+
this.stack.pop()
|
|
43
|
+
this.relations.push(new StackRelation(last, new UniversalDependencyRelation(index, type.toString().replace("_", ":"))))
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
public applyArcEagerLeftArc(type: UniversalDependencyType){
|
|
48
|
+
if (this.stack.length > 0 && this.wordList.length > 0){
|
|
49
|
+
let lastElementOfStack = this.stack[this.stack.length - 1].getWord()
|
|
50
|
+
let index = this.wordList[0].getToWord()
|
|
51
|
+
lastElementOfStack.setRelation(new UniversalDependencyRelation(index, type.toString().replace("_", ":")))
|
|
52
|
+
this.stack.pop()
|
|
53
|
+
this.relations.push(new StackRelation(lastElementOfStack, new UniversalDependencyRelation(index, type.toString().replace("_", ":"))))
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
public applyArcEagerRightArc(type: UniversalDependencyType){
|
|
58
|
+
if (this.stack.length > 0 && this.wordList.length > 0){
|
|
59
|
+
let firstElementOfWordList = this.wordList[0].getWord()
|
|
60
|
+
let index = this.stack[this.stack.length - 1].getToWord()
|
|
61
|
+
firstElementOfWordList.setRelation(new UniversalDependencyRelation(index, type.toString().replace("_", ":")))
|
|
62
|
+
this.applyShift()
|
|
63
|
+
this.relations.push(new StackRelation(firstElementOfWordList, new UniversalDependencyRelation(index, type.toString().replace("_", ":"))))
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
public applyReduce(){
|
|
68
|
+
if (this.stack.length > 0){
|
|
69
|
+
this.stack.pop()
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
public apply(command: Command, type: UniversalDependencyType, transitionSystem: TransitionSystem){
|
|
74
|
+
switch (transitionSystem){
|
|
75
|
+
case TransitionSystem.ARC_STANDARD:
|
|
76
|
+
switch (command) {
|
|
77
|
+
case Command.LEFTARC:
|
|
78
|
+
this.applyLeftArc(type)
|
|
79
|
+
break
|
|
80
|
+
case Command.RIGHTARC:
|
|
81
|
+
this.applyRightArc(type)
|
|
82
|
+
break
|
|
83
|
+
case Command.SHIFT:
|
|
84
|
+
this.applyShift()
|
|
85
|
+
break
|
|
86
|
+
}
|
|
87
|
+
break
|
|
88
|
+
case TransitionSystem.ARC_EAGER:
|
|
89
|
+
switch (command){
|
|
90
|
+
case Command.LEFTARC:
|
|
91
|
+
this.applyArcEagerLeftArc(type)
|
|
92
|
+
break
|
|
93
|
+
case Command.RIGHTARC:
|
|
94
|
+
this.applyArcEagerRightArc(type)
|
|
95
|
+
break
|
|
96
|
+
case Command.SHIFT:
|
|
97
|
+
this.applyShift()
|
|
98
|
+
break
|
|
99
|
+
case Command.REDUCE:
|
|
100
|
+
this.applyReduce()
|
|
101
|
+
break
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
public relationSize(): number{
|
|
107
|
+
return this.relations.length
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
public wordListSize(): number{
|
|
111
|
+
return this.wordList.length
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
public stackSize(): number{
|
|
115
|
+
return this.stack.length
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
public getStackWord(index: number): UniversalDependencyTreeBankWord{
|
|
119
|
+
let size = this.stack.length - 1
|
|
120
|
+
if (size - index < 0){
|
|
121
|
+
return null
|
|
122
|
+
}
|
|
123
|
+
return this.stack[size - index].getWord()
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
public getPeek(): UniversalDependencyTreeBankWord{
|
|
127
|
+
if (this.stack.length > 0){
|
|
128
|
+
return this.stack[this.stack.length - 1].getWord()
|
|
129
|
+
}
|
|
130
|
+
return null
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
public getWordListWord(index: number): UniversalDependencyTreeBankWord{
|
|
134
|
+
if (index > this.wordList.length - 1){
|
|
135
|
+
return null
|
|
136
|
+
}
|
|
137
|
+
return this.wordList[index].getWord()
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
public getRelation(index: number): StackRelation{
|
|
141
|
+
if (index < this.relations.length){
|
|
142
|
+
return this.relations[index]
|
|
143
|
+
}
|
|
144
|
+
return null
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
public clone(): State{
|
|
148
|
+
let o = new State(new Array<StackWord>(), new Array<StackWord>(), new Array<StackRelation>())
|
|
149
|
+
for (let element of this.stack){
|
|
150
|
+
if (element.getWord().getName() != "root"){
|
|
151
|
+
o.stack.push(element.clone())
|
|
152
|
+
} else {
|
|
153
|
+
o.stack.push(new StackWord(new UniversalDependencyTreeBankWord(), element.getToWord()))
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
for (let word of this.wordList){
|
|
157
|
+
o.wordList.push(word.clone())
|
|
158
|
+
}
|
|
159
|
+
for (let relation of this.relations){
|
|
160
|
+
if (relation.getWord().getName() != "root"){
|
|
161
|
+
o.relations.push(relation.clone())
|
|
162
|
+
} else {
|
|
163
|
+
o.relations.push(new StackRelation(new UniversalDependencyTreeBankWord(), relation.getRelation()))
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
return o
|
|
167
|
+
}
|
|
168
|
+
}
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
import {
|
|
2
|
+
UniversalDependencyTreeBankSentence
|
|
3
|
+
} from "nlptoolkit-dependencyparser/dist/Universal/UniversalDependencyTreeBankSentence";
|
|
4
|
+
import {
|
|
5
|
+
UniversalDependencyTreeBankWord
|
|
6
|
+
} from "nlptoolkit-dependencyparser/dist/Universal/UniversalDependencyTreeBankWord";
|
|
7
|
+
import {
|
|
8
|
+
UniversalDependencyTreeBankCorpus
|
|
9
|
+
} from "nlptoolkit-dependencyparser/dist/Universal/UniversalDependencyTreeBankCorpus";
|
|
10
|
+
import {DataSet} from "nlptoolkit-classification/dist/DataSet/DataSet";
|
|
11
|
+
import {Instance} from "nlptoolkit-classification/dist/Instance/Instance";
|
|
12
|
+
import {Oracle} from "./Oracle";
|
|
13
|
+
import {Agenda} from "./Agenda";
|
|
14
|
+
import {State} from "./State";
|
|
15
|
+
import {StackWord} from "./StackWord";
|
|
16
|
+
import {StackRelation} from "./StackRelation";
|
|
17
|
+
import {Candidate} from "./Candidate";
|
|
18
|
+
import {UniversalDependencyType} from "nlptoolkit-dependencyparser/dist/Universal/UniversalDependencyType";
|
|
19
|
+
import {UniversalDependencyRelation} from "nlptoolkit-dependencyparser/dist/Universal/UniversalDependencyRelation";
|
|
20
|
+
import {ScoringOracle} from "./ScoringOracle";
|
|
21
|
+
|
|
22
|
+
export abstract class TransitionParser {
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Parses a single sentence and returns a list of instances that represent the parsing process.
|
|
26
|
+
* @param sentence the sentence to be parsed
|
|
27
|
+
* @param windowSize the size of the window used in parsing
|
|
28
|
+
* @return a list of {@link Instance} objects representing the parsing process
|
|
29
|
+
*/
|
|
30
|
+
abstract simulateParse(sentence: UniversalDependencyTreeBankSentence, windowSize: number): Array<Instance>
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Parses a single sentence using a specified oracle and returns the parsed sentence with dependencies.
|
|
34
|
+
* @param universalDependencyTreeBankSentence the sentence to be parsed
|
|
35
|
+
* @param oracle the oracle used for guiding the parsing process
|
|
36
|
+
* @return a {@link UniversalDependencyTreeBankSentence} with dependencies parsed
|
|
37
|
+
*/
|
|
38
|
+
abstract dependencyParse(universalDependencyTreeBankSentence: UniversalDependencyTreeBankSentence, oracle: Oracle): UniversalDependencyTreeBankSentence
|
|
39
|
+
|
|
40
|
+
protected constructor() {
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Creates a new {@link UniversalDependencyTreeBankSentence} with the same words as the input sentence,
|
|
45
|
+
* but with null heads, effectively cloning the sentence structure without dependencies.
|
|
46
|
+
* @param universalDependencyTreeBankSentence the sentence to be cloned
|
|
47
|
+
* @return a new {@link UniversalDependencyTreeBankSentence} with copied words but no dependencies
|
|
48
|
+
*/
|
|
49
|
+
protected createResultSentence(universalDependencyTreeBankSentence: UniversalDependencyTreeBankSentence): UniversalDependencyTreeBankSentence{
|
|
50
|
+
let sentence = new UniversalDependencyTreeBankSentence()
|
|
51
|
+
for (let i = 0; i < universalDependencyTreeBankSentence.wordCount(); i++) {
|
|
52
|
+
let word = universalDependencyTreeBankSentence.getWord(i)
|
|
53
|
+
if (word instanceof UniversalDependencyTreeBankWord){
|
|
54
|
+
sentence.addWord(new UniversalDependencyTreeBankWord(word.getId(), word.getName(), word.getLemma(), word.getUpos(), word.getXpos(), word.getFeatures(), null, word.getDeps(), word.getMisc()))
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
return sentence;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Simulates parsing a corpus of sentences, returning a dataset of instances created by parsing each sentence.
|
|
62
|
+
* @param corpus the corpus to be parsed
|
|
63
|
+
* @param windowSize the size of the window used in parsing
|
|
64
|
+
* @return a {@link DataSet} containing instances from parsing each sentence in the corpus
|
|
65
|
+
*/
|
|
66
|
+
public simulateParseOnCorpus(corpus: UniversalDependencyTreeBankCorpus, windowSize: number): DataSet{
|
|
67
|
+
let dataSet = new DataSet()
|
|
68
|
+
for (let i = 0; i < corpus.sentenceCount(); i++) {
|
|
69
|
+
let sentence = corpus.getSentence(i)
|
|
70
|
+
if (sentence instanceof UniversalDependencyTreeBankSentence){
|
|
71
|
+
dataSet.addInstanceList(this.simulateParse(sentence, windowSize))
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
return dataSet;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* Checks if there are any states in the agenda that still have words to process or have more than one item in the stack.
|
|
79
|
+
* @param agenda the agenda containing the states
|
|
80
|
+
* @return true if there are states to process, false otherwise
|
|
81
|
+
*/
|
|
82
|
+
private checkStates(agenda: Agenda): boolean{
|
|
83
|
+
for (let state of agenda.getKeySet()){
|
|
84
|
+
if (state.wordListSize() > 0 || state.stackSize() > 1){
|
|
85
|
+
return true
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
return false
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Initializes the parsing state with a stack containing one empty {@link StackWord} and a word list containing all words in the sentence.
|
|
93
|
+
* @param sentence the sentence to initialize the state with
|
|
94
|
+
* @return a {@link State} representing the starting point for parsing
|
|
95
|
+
*/
|
|
96
|
+
protected initialState(sentence: UniversalDependencyTreeBankSentence): State{
|
|
97
|
+
let wordList = new Array<StackWord>();
|
|
98
|
+
for (let i = 0; i < sentence.wordCount(); i++) {
|
|
99
|
+
let word = sentence.getWord(i)
|
|
100
|
+
if (word instanceof UniversalDependencyTreeBankWord){
|
|
101
|
+
wordList.push(new StackWord(word, i + 1))
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
let stack = new Array<StackWord>()
|
|
105
|
+
stack.push(new StackWord())
|
|
106
|
+
return new State(stack, wordList, new Array<StackRelation>())
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Constructs possible parsing candidates based on the current state and transition system.
|
|
111
|
+
* @param transitionSystem the transition system used (ARC_STANDARD or ARC_EAGER)
|
|
112
|
+
* @param state the current parsing state
|
|
113
|
+
* @return a list of possible {@link Candidate} actions to be applied
|
|
114
|
+
*/
|
|
115
|
+
private constructCandidates(transitionSystem: TransitionSystem, state: State): Array<Candidate>{
|
|
116
|
+
if (state.stackSize() == 1 && state.wordListSize() == 0) {
|
|
117
|
+
return new Array<Candidate>()
|
|
118
|
+
}
|
|
119
|
+
let subsets = new Array<Candidate>()
|
|
120
|
+
if (state.wordListSize() > 0) {
|
|
121
|
+
subsets.push(new Candidate(Command.SHIFT, UniversalDependencyType.DEP))
|
|
122
|
+
}
|
|
123
|
+
if (transitionSystem == TransitionSystem.ARC_EAGER && state.stackSize() > 0) {
|
|
124
|
+
subsets.push(new Candidate(Command.REDUCE, UniversalDependencyType.DEP))
|
|
125
|
+
}
|
|
126
|
+
for (let i = 0; i < UniversalDependencyRelation.universalDependencyTypes.length; i++) {
|
|
127
|
+
let type = UniversalDependencyRelation.getDependencyTag(UniversalDependencyRelation.universalDependencyTypes[i])
|
|
128
|
+
if (transitionSystem == TransitionSystem.ARC_STANDARD && state.stackSize() > 1) {
|
|
129
|
+
subsets.push(new Candidate(Command.LEFTARC, type))
|
|
130
|
+
subsets.push(new Candidate(Command.RIGHTARC, type))
|
|
131
|
+
} else if (transitionSystem == TransitionSystem.ARC_EAGER && state.stackSize() > 0 && state.wordListSize() > 0) {
|
|
132
|
+
subsets.push(new Candidate(Command.LEFTARC, type))
|
|
133
|
+
subsets.push(new Candidate(Command.RIGHTARC, type))
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
return subsets
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Performs dependency parsing with beam search to find the best parse for a given sentence.
|
|
141
|
+
* @param oracle the scoring oracle used for guiding the search
|
|
142
|
+
* @param beamSize the size of the beam for beam search
|
|
143
|
+
* @param universalDependencyTreeBankSentence the sentence to be parsed
|
|
144
|
+
* @param transitionSystem the transition system used (ARC_STANDARD or ARC_EAGER)
|
|
145
|
+
* @return the best parsing state from the beam search
|
|
146
|
+
*/
|
|
147
|
+
public dependencyParseWithBeamSearch(oracle: ScoringOracle,
|
|
148
|
+
beamSize: number,
|
|
149
|
+
universalDependencyTreeBankSentence: UniversalDependencyTreeBankSentence,
|
|
150
|
+
transitionSystem: TransitionSystem): State{
|
|
151
|
+
let sentence = this.createResultSentence(universalDependencyTreeBankSentence)
|
|
152
|
+
let initialState = this.initialState(sentence)
|
|
153
|
+
let agenda = new Agenda(beamSize)
|
|
154
|
+
agenda.updateAgenda(oracle, initialState.clone())
|
|
155
|
+
while (this.checkStates(agenda)) {
|
|
156
|
+
for (let state of agenda.getKeySet()) {
|
|
157
|
+
let subsets = this.constructCandidates(transitionSystem, state)
|
|
158
|
+
for (let subset of subsets) {
|
|
159
|
+
let command = subset.getCommand()
|
|
160
|
+
let type = subset.getUniversalDependencyType()
|
|
161
|
+
let cloneState = state.clone()
|
|
162
|
+
cloneState.apply(command, type, transitionSystem)
|
|
163
|
+
agenda.updateAgenda(oracle, cloneState.clone())
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
return agenda.best()
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
/**
|
|
171
|
+
* Parses a corpus of sentences using the given oracle and returns a new corpus with the parsed sentences.
|
|
172
|
+
* @param universalDependencyTreeBankCorpus the corpus to be parsed
|
|
173
|
+
* @param oracle the oracle used for guiding the parsing process
|
|
174
|
+
* @return a {@link UniversalDependencyTreeBankCorpus} containing the parsed sentences
|
|
175
|
+
*/
|
|
176
|
+
public dependencyParseCorpus(universalDependencyTreeBankCorpus: UniversalDependencyTreeBankCorpus,
|
|
177
|
+
oracle: Oracle){
|
|
178
|
+
let corpus = new UniversalDependencyTreeBankCorpus()
|
|
179
|
+
for (let i = 0; i < universalDependencyTreeBankCorpus.sentenceCount(); i++) {
|
|
180
|
+
let sentence = universalDependencyTreeBankCorpus.getSentence(i)
|
|
181
|
+
if (sentence instanceof UniversalDependencyTreeBankSentence){
|
|
182
|
+
corpus.addSentence(this.dependencyParse(sentence, oracle))
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
return corpus
|
|
186
|
+
}
|
|
187
|
+
}
|