nlptoolkit-universaldependencyparser 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. package/dist/Agenda.d.ts +24 -0
  2. package/dist/Agenda.js +72 -0
  3. package/dist/Agenda.js.map +1 -0
  4. package/dist/ArcEagerInstanceGenerator.d.ts +20 -0
  5. package/dist/ArcEagerInstanceGenerator.js +81 -0
  6. package/dist/ArcEagerInstanceGenerator.js.map +1 -0
  7. package/dist/ArcEagerOracle.d.ts +9 -0
  8. package/dist/ArcEagerOracle.js +39 -0
  9. package/dist/ArcEagerOracle.js.map +1 -0
  10. package/dist/ArcEagerTransitionParser.d.ts +9 -0
  11. package/dist/ArcEagerTransitionParser.js +113 -0
  12. package/dist/ArcEagerTransitionParser.js.map +1 -0
  13. package/dist/ArcStandardOracle.d.ts +9 -0
  14. package/dist/ArcStandardOracle.js +36 -0
  15. package/dist/ArcStandardOracle.js.map +1 -0
  16. package/dist/ArcStandardTransitionParser.d.ts +28 -0
  17. package/dist/ArcStandardTransitionParser.js +129 -0
  18. package/dist/ArcStandardTransitionParser.js.map +1 -0
  19. package/dist/Candidate.d.ts +8 -0
  20. package/dist/Candidate.js +27 -0
  21. package/dist/Candidate.js.map +1 -0
  22. package/dist/Command.d.ts +6 -0
  23. package/dist/Command.js +8 -0
  24. package/dist/Command.js.map +1 -0
  25. package/dist/Decision.d.ts +7 -0
  26. package/dist/Decision.js +25 -0
  27. package/dist/Decision.js.map +1 -0
  28. package/dist/InstanceGenerator.d.ts +33 -0
  29. package/dist/InstanceGenerator.js +77 -0
  30. package/dist/InstanceGenerator.js.map +1 -0
  31. package/dist/Oracle.d.ts +49 -0
  32. package/dist/Oracle.js +115 -0
  33. package/dist/Oracle.js.map +1 -0
  34. package/dist/RandomOracle.d.ts +14 -0
  35. package/dist/RandomOracle.js +45 -0
  36. package/dist/RandomOracle.js.map +1 -0
  37. package/dist/RandomScoringOracle.d.ts +5 -0
  38. package/dist/RandomScoringOracle.js +21 -0
  39. package/dist/RandomScoringOracle.js.map +1 -0
  40. package/dist/ScoringOracle.d.ts +4 -0
  41. package/dist/ScoringOracle.js +17 -0
  42. package/dist/ScoringOracle.js.map +1 -0
  43. package/dist/SimpleInstanceGenerator.d.ts +14 -0
  44. package/dist/SimpleInstanceGenerator.js +67 -0
  45. package/dist/SimpleInstanceGenerator.js.map +1 -0
  46. package/dist/StackRelation.d.ts +10 -0
  47. package/dist/StackRelation.js +30 -0
  48. package/dist/StackRelation.js.map +1 -0
  49. package/dist/StackWord.d.ts +11 -0
  50. package/dist/StackWord.js +42 -0
  51. package/dist/StackWord.js.map +1 -0
  52. package/dist/State.d.ts +25 -0
  53. package/dist/State.js +161 -0
  54. package/dist/State.js.map +1 -0
  55. package/dist/TransitionParser.d.ts +73 -0
  56. package/dist/TransitionParser.js +164 -0
  57. package/dist/TransitionParser.js.map +1 -0
  58. package/dist/TransitionSystem.d.ts +4 -0
  59. package/dist/TransitionSystem.js +6 -0
  60. package/dist/TransitionSystem.js.map +1 -0
  61. package/index.js +20 -0
  62. package/models/en_atis_eager_c45_2.txt +1903 -0
  63. package/models/en_atis_eager_c45_3.txt +1918 -0
  64. package/models/en_atis_standard_c45_2.txt +1948 -0
  65. package/models/en_atis_standard_c45_3.txt +2233 -0
  66. package/models/tr_atis_eager_c45_2.txt +2208 -0
  67. package/models/tr_atis_eager_c45_3.txt +2303 -0
  68. package/models/tr_atis_standard_c45_2.txt +2863 -0
  69. package/models/tr_atis_standard_c45_3.txt +3413 -0
  70. package/models/tr_boun_eager_c45_2.txt +1613 -0
  71. package/models/tr_boun_eager_c45_3.txt +1818 -0
  72. package/models/tr_boun_standard_c45_2.txt +898 -0
  73. package/models/tr_boun_standard_c45_3.txt +1138 -0
  74. package/models/tr_framenet_eager_c45_2.txt +453 -0
  75. package/models/tr_framenet_eager_c45_3.txt +668 -0
  76. package/models/tr_framenet_standard_c45_2.txt +363 -0
  77. package/models/tr_framenet_standard_c45_3.txt +238 -0
  78. package/models/tr_kenet_eager_c45_2.txt +1788 -0
  79. package/models/tr_kenet_eager_c45_3.txt +1588 -0
  80. package/models/tr_kenet_standard_c45_2.txt +1308 -0
  81. package/models/tr_kenet_standard_c45_3.txt +1378 -0
  82. package/models/tr_penn_eager_c45_2.txt +2788 -0
  83. package/models/tr_penn_eager_c45_3.txt +1718 -0
  84. package/models/tr_penn_standard_c45_2.txt +2203 -0
  85. package/models/tr_penn_standard_c45_3.txt +1493 -0
  86. package/models/tr_tourism_eager_c45_2.txt +3468 -0
  87. package/models/tr_tourism_eager_c45_3.txt +4213 -0
  88. package/models/tr_tourism_standard_c45_2.txt +3868 -0
  89. package/models/tr_tourism_standard_c45_3.txt +4728 -0
  90. package/package.json +30 -0
  91. package/source/Parser/TransitionBasedParser/Agenda.ts +66 -0
  92. package/source/Parser/TransitionBasedParser/ArcEagerInstanceGenerator.ts +73 -0
  93. package/source/Parser/TransitionBasedParser/ArcEagerOracle.ts +30 -0
  94. package/source/Parser/TransitionBasedParser/ArcEagerTransitionParser.ts +105 -0
  95. package/source/Parser/TransitionBasedParser/ArcStandardOracle.ts +28 -0
  96. package/source/Parser/TransitionBasedParser/ArcStandardTransitionParser.ts +123 -0
  97. package/source/Parser/TransitionBasedParser/Candidate.ts +20 -0
  98. package/source/Parser/TransitionBasedParser/Command.ts +3 -0
  99. package/source/Parser/TransitionBasedParser/Decision.ts +16 -0
  100. package/source/Parser/TransitionBasedParser/InstanceGenerator.ts +85 -0
  101. package/source/Parser/TransitionBasedParser/Oracle.ts +123 -0
  102. package/source/Parser/TransitionBasedParser/RandomOracle.ts +37 -0
  103. package/source/Parser/TransitionBasedParser/RandomScoringOracle.ts +10 -0
  104. package/source/Parser/TransitionBasedParser/ScoringOracle.ts +5 -0
  105. package/source/Parser/TransitionBasedParser/SimpleInstanceGenerator.ts +57 -0
  106. package/source/Parser/TransitionBasedParser/StackRelation.ts +27 -0
  107. package/source/Parser/TransitionBasedParser/StackWord.ts +38 -0
  108. package/source/Parser/TransitionBasedParser/State.ts +168 -0
  109. package/source/Parser/TransitionBasedParser/TransitionParser.ts +187 -0
  110. package/source/Parser/TransitionBasedParser/TransitionSystem.ts +3 -0
  111. package/source/tsconfig.json +13 -0
  112. package/tsconfig.json +15 -0
@@ -0,0 +1,123 @@
1
+ import {Model} from "nlptoolkit-classification/dist/Model/Model";
2
+ import {Decision} from "./Decision";
3
+ import {State} from "./State";
4
+ import {Candidate} from "./Candidate";
5
+ import {UniversalDependencyRelation} from "nlptoolkit-dependencyparser/dist/Universal/UniversalDependencyRelation";
6
+ import {UniversalDependencyType} from "nlptoolkit-dependencyparser/dist/Universal/UniversalDependencyType";
7
+
8
+ export abstract class Oracle {
9
+
10
+ protected commandModel: Model
11
+ protected windowSize: number
12
+
13
+ /**
14
+ * Constructs an Oracle with the given model and window size.
15
+ * @param model the model used for making predictions
16
+ * @param windowSize the size of the window used in parsing
17
+ */
18
+ protected constructor(model: Model, windowSize: number) {
19
+ this.commandModel = model
20
+ this.windowSize = windowSize
21
+ }
22
+
23
+ /**
24
+ * Abstract method to be implemented by subclasses to make a parsing decision based on the current state.
25
+ * @param state the current parsing state
26
+ * @return a {@link Decision} object representing the action to be taken
27
+ */
28
+ abstract makeDecision(state: State): Decision
29
+
30
+ /**
31
+ * Abstract method to be implemented by subclasses to score potential decisions based on the current state and transition system.
32
+ * @param state the current parsing state
33
+ * @param transitionSystem the transition system being used (e.g., ARC_STANDARD or ARC_EAGER)
34
+ * @return a list of {@link Decision} objects, each with a score indicating its suitability
35
+ */
36
+ protected abstract scoreDecisions(state: State, transitionSystem: TransitionSystem): Array<Decision>
37
+
38
+ /**
39
+ * Finds the best valid parsing action for the ARC_EAGER transition system based on probabilities.
40
+ * Ensures the action is applicable given the current state.
41
+ * @param probabilities a map of actions to their associated probabilities
42
+ * @param state the current parsing state
43
+ * @return the best action as a string, or an empty string if no valid action is found
44
+ */
45
+ protected findBestValidEagerClassInfo(probabilities: Map<string, number>, state: State): string{
46
+ let bestValue = 0.0
47
+ let best = ""
48
+ for (let key in probabilities){
49
+ if (probabilities.get(key) > bestValue){
50
+ if (key == "SHIFT" || key == "RIGHTARC"){
51
+ if (state.wordListSize() > 0){
52
+ best = key
53
+ bestValue = probabilities.get(key)
54
+ }
55
+ } else {
56
+ if (state.stackSize() > 1){
57
+ if (!(key == "REDUCE" && state.getPeek().getRelation() == null)){
58
+ best = key
59
+ bestValue = probabilities.get(key)
60
+ }
61
+ }
62
+ }
63
+ }
64
+ }
65
+ return best
66
+ }
67
+
68
+ /**
69
+ * Finds the best valid parsing action for the ARC_STANDARD transition system based on probabilities.
70
+ * Ensures the action is applicable given the current state.
71
+ * @param probabilities a map of actions to their associated probabilities
72
+ * @param state the current parsing state
73
+ * @return the best action as a string, or an empty string if no valid action is found
74
+ */
75
+ protected findBestValidStandardClassInfo(probabilities: Map<string, number>, state: State): string{
76
+ let bestValue = 0.0
77
+ let best = ""
78
+ for (let key in probabilities){
79
+ if (probabilities.get(key) > bestValue){
80
+ if (key == "SHIFT"){
81
+ if (state.wordListSize() > 0){
82
+ best = key
83
+ bestValue = probabilities.get(key)
84
+ }
85
+ } else {
86
+ if (state.stackSize() > 1){
87
+ best = key
88
+ bestValue = probabilities.get(key)
89
+ }
90
+ }
91
+ }
92
+ }
93
+ return best
94
+ }
95
+
96
+ /**
97
+ * Converts a string representation of the best action into a {@link Candidate} object.
98
+ * @param best the best action represented as a string, possibly with a dependency type in parentheses
99
+ * @return a {@link Candidate} object representing the action, or null if the action is unknown
100
+ */
101
+ protected getDecisionCandidate(best: string): Candidate{
102
+ let command, type
103
+ if (best.includes("(")){
104
+ command = best.substring(0, best.indexOf('('))
105
+ let relation = best.substring(best.indexOf('(') + 1, best.indexOf(')'))
106
+ type = UniversalDependencyRelation.getDependencyTag(relation)
107
+ } else {
108
+ command = best
109
+ type = UniversalDependencyType.DEP
110
+ }
111
+ switch (command){
112
+ case "SHIFT":
113
+ return new Candidate(Command.SHIFT, type)
114
+ case "REDUCE":
115
+ return new Candidate(Command.REDUCE, type)
116
+ case "LEFTARC":
117
+ return new Candidate(Command.LEFTARC, type)
118
+ case "RIGHTARC":
119
+ return new Candidate(Command.RIGHTARC, type)
120
+ }
121
+ return null
122
+ }
123
+ }
@@ -0,0 +1,37 @@
1
+ import {Oracle} from "./Oracle";
2
+ import {State} from "./State";
3
+ import {Decision} from "./Decision";
4
+ import {Model} from "nlptoolkit-classification/dist/Model/Model";
5
+ import {UniversalDependencyType} from "nlptoolkit-dependencyparser/dist/Universal/UniversalDependencyType";
6
+ import {UniversalDependencyRelation} from "nlptoolkit-dependencyparser/dist/Universal/UniversalDependencyRelation";
7
+
8
+ export class RandomOracle extends Oracle{
9
+
10
+ constructor(model: Model, windowSize: number) {
11
+ super(model, windowSize)
12
+ }
13
+
14
+ /**
15
+ * Makes a random decision based on a uniform distribution over possible actions.
16
+ * @param state The current state of the parser.
17
+ * @return A Decision object representing the randomly chosen action.
18
+ */
19
+ public makeDecision(state: State): Decision {
20
+ let command = Math.floor(Math.random() * 3)
21
+ let relation = Math.floor(Math.random() * 58)
22
+ switch (command){
23
+ case 0:
24
+ return new Decision(Command.LEFTARC, UniversalDependencyRelation.universalDependencyTags[relation], 0)
25
+ case 1:
26
+ return new Decision(Command.RIGHTARC, UniversalDependencyRelation.universalDependencyTags[relation], 0)
27
+ case 2:
28
+ return new Decision(Command.SHIFT, UniversalDependencyType.DEP, 0)
29
+ }
30
+ return null
31
+ }
32
+
33
+ protected scoreDecisions(state: State, transitionSystem: TransitionSystem): Array<Decision> {
34
+ return null;
35
+ }
36
+
37
+ }
@@ -0,0 +1,10 @@
1
+ import {ScoringOracle} from "./ScoringOracle";
2
+ import {State} from "./State";
3
+
4
+ export class RandomScoringOracle extends ScoringOracle{
5
+
6
+ score(state: State): number {
7
+ return Math.random()
8
+ }
9
+
10
+ }
@@ -0,0 +1,5 @@
1
+ import {State} from "./State";
2
+
3
+ export abstract class ScoringOracle {
4
+ abstract score(state: State): number
5
+ }
@@ -0,0 +1,57 @@
1
+ import {InstanceGenerator} from "./InstanceGenerator";
2
+ import {State} from "./State";
3
+ import {Instance} from "nlptoolkit-classification/dist/Instance/Instance";
4
+ import {DiscreteIndexedAttribute} from "nlptoolkit-classification/dist/Attribute/DiscreteIndexedAttribute";
5
+ import {
6
+ UniversalDependencyTreeBankFeatures
7
+ } from "nlptoolkit-dependencyparser/dist/Universal/UniversalDependencyTreeBankFeatures";
8
+ import {Attribute} from "nlptoolkit-classification/dist/Attribute/Attribute";
9
+
10
+ export class SimpleInstanceGenerator extends InstanceGenerator{
11
+
12
+ constructor() {
13
+ super();
14
+ }
15
+
16
+ /**
17
+ * Generates an instance based on the state, window size, and command.
18
+ * @param state The current state of the parser, which includes the stack and word list.
19
+ * @param windowSize The size of the window used for feature extraction.
20
+ * @param command The command to be associated with the generated instance.
21
+ * @return The generated {@link Instance} object with attributes based on the state and command.
22
+ */
23
+ generate(state: State, windowSize: number, command: string): Instance {
24
+ let instance = new Instance(command)
25
+ let attributes = new Array<Attribute>()
26
+ for (let i = 0; i < windowSize; i++) {
27
+ let word = state.getStackWord(i);
28
+ if (word == null) {
29
+ attributes.push(new DiscreteIndexedAttribute("null", 0, 18))
30
+ this.addEmptyAttributes(attributes)
31
+ } else {
32
+ if (word.getName() == "root") {
33
+ attributes.push(new DiscreteIndexedAttribute("root", 0, 18))
34
+ this.addEmptyAttributes(attributes)
35
+ } else {
36
+ attributes.push(new DiscreteIndexedAttribute(word.getUpos().toString(), UniversalDependencyTreeBankFeatures.posIndex(word.getUpos().toString()) + 1, 18))
37
+ this.addFeatureAttributes(word, attributes)
38
+ }
39
+ }
40
+ }
41
+ for (let i = 0; i < windowSize; i++) {
42
+ let word = state.getWordListWord(i)
43
+ if (word != null) {
44
+ attributes.push(new DiscreteIndexedAttribute(word.getUpos().toString(), UniversalDependencyTreeBankFeatures.posIndex(word.getUpos().toString()) + 1, 18))
45
+ this.addFeatureAttributes(word, attributes)
46
+ } else {
47
+ attributes.push(new DiscreteIndexedAttribute("root", 0, 18))
48
+ this.addEmptyAttributes(attributes)
49
+ }
50
+ }
51
+ for (let attribute of attributes) {
52
+ instance.addAttribute(attribute)
53
+ }
54
+ return instance
55
+ }
56
+
57
+ }
@@ -0,0 +1,27 @@
1
+ import {
2
+ UniversalDependencyTreeBankWord
3
+ } from "nlptoolkit-dependencyparser/dist/Universal/UniversalDependencyTreeBankWord";
4
+ import {UniversalDependencyRelation} from "nlptoolkit-dependencyparser/dist/Universal/UniversalDependencyRelation";
5
+
6
+ export class StackRelation {
7
+
8
+ private readonly word: UniversalDependencyTreeBankWord
9
+ private readonly relation: UniversalDependencyRelation
10
+
11
+ constructor(word: UniversalDependencyTreeBankWord, relation: UniversalDependencyRelation) {
12
+ this.word = word
13
+ this.relation = relation
14
+ }
15
+
16
+ clone(): StackRelation{
17
+ return new StackRelation(this.word.clone(), this.relation)
18
+ }
19
+
20
+ getWord(): UniversalDependencyTreeBankWord{
21
+ return this.word
22
+ }
23
+
24
+ getRelation(): UniversalDependencyRelation{
25
+ return this.relation
26
+ }
27
+ }
@@ -0,0 +1,38 @@
1
+ import {
2
+ UniversalDependencyTreeBankWord
3
+ } from "nlptoolkit-dependencyparser/dist/Universal/UniversalDependencyTreeBankWord";
4
+
5
+ export class StackWord {
6
+
7
+ private word: UniversalDependencyTreeBankWord
8
+ private toWord: number
9
+
10
+ constructor1() {
11
+ this.word = new UniversalDependencyTreeBankWord()
12
+ }
13
+
14
+ constructor2(word: UniversalDependencyTreeBankWord, toWord: number){
15
+ this.word = word
16
+ this.toWord = toWord
17
+ }
18
+
19
+ clone(): StackWord{
20
+ return new StackWord(this.word.clone(), this.toWord)
21
+ }
22
+
23
+ constructor(word?: UniversalDependencyTreeBankWord, toWord?: number) {
24
+ if (word == undefined){
25
+ this.constructor1()
26
+ } else {
27
+ this.constructor2(word, toWord)
28
+ }
29
+ }
30
+
31
+ getWord(): UniversalDependencyTreeBankWord{
32
+ return this.word
33
+ }
34
+
35
+ getToWord(): number{
36
+ return this.toWord
37
+ }
38
+ }
@@ -0,0 +1,168 @@
1
+ import {StackWord} from "./StackWord";
2
+ import {StackRelation} from "./StackRelation";
3
+ import {UniversalDependencyType} from "nlptoolkit-dependencyparser/dist/Universal/UniversalDependencyType";
4
+ import {UniversalDependencyRelation} from "nlptoolkit-dependencyparser/dist/Universal/UniversalDependencyRelation";
5
+ import {
6
+ UniversalDependencyTreeBankWord
7
+ } from "nlptoolkit-dependencyparser/dist/Universal/UniversalDependencyTreeBankWord";
8
+
9
+ export class State {
10
+
11
+ private stack: Array<StackWord>
12
+ private wordList: Array<StackWord>
13
+ private relations: Array<StackRelation>
14
+
15
+ constructor(stack: Array<StackWord>, wordList: Array<StackWord>, relations: Array<StackRelation>){
16
+ this.stack = stack
17
+ this.wordList = wordList
18
+ this.relations = relations
19
+ }
20
+
21
+ public applyShift(){
22
+ if (this.wordList.length > 0){
23
+ this.stack.push(this.wordList.splice(0, 1)[0])
24
+ }
25
+ }
26
+
27
+ public applyLeftArc(type: UniversalDependencyType){
28
+ if (this.stack.length > 1){
29
+ let beforeLast = this.stack[this.stack.length - 2].getWord()
30
+ let index = this.stack[this.stack.length - 1].getToWord()
31
+ beforeLast.setRelation(new UniversalDependencyRelation(index, type.toString().replace("_", ":")))
32
+ this.stack.splice(this.stack.length - 2, 1)
33
+ this.relations.push(new StackRelation(beforeLast, new UniversalDependencyRelation(index, type.toString().replace("_", ":"))))
34
+ }
35
+ }
36
+
37
+ public applyRightArc(type: UniversalDependencyType){
38
+ if (this.stack.length > 1){
39
+ let last = this.stack[this.stack.length - 1].getWord()
40
+ let index = this.stack[this.stack.length - 2].getToWord()
41
+ last.setRelation(new UniversalDependencyRelation(index, type.toString().replace("_", ":")))
42
+ this.stack.pop()
43
+ this.relations.push(new StackRelation(last, new UniversalDependencyRelation(index, type.toString().replace("_", ":"))))
44
+ }
45
+ }
46
+
47
+ public applyArcEagerLeftArc(type: UniversalDependencyType){
48
+ if (this.stack.length > 0 && this.wordList.length > 0){
49
+ let lastElementOfStack = this.stack[this.stack.length - 1].getWord()
50
+ let index = this.wordList[0].getToWord()
51
+ lastElementOfStack.setRelation(new UniversalDependencyRelation(index, type.toString().replace("_", ":")))
52
+ this.stack.pop()
53
+ this.relations.push(new StackRelation(lastElementOfStack, new UniversalDependencyRelation(index, type.toString().replace("_", ":"))))
54
+ }
55
+ }
56
+
57
+ public applyArcEagerRightArc(type: UniversalDependencyType){
58
+ if (this.stack.length > 0 && this.wordList.length > 0){
59
+ let firstElementOfWordList = this.wordList[0].getWord()
60
+ let index = this.stack[this.stack.length - 1].getToWord()
61
+ firstElementOfWordList.setRelation(new UniversalDependencyRelation(index, type.toString().replace("_", ":")))
62
+ this.applyShift()
63
+ this.relations.push(new StackRelation(firstElementOfWordList, new UniversalDependencyRelation(index, type.toString().replace("_", ":"))))
64
+ }
65
+ }
66
+
67
+ public applyReduce(){
68
+ if (this.stack.length > 0){
69
+ this.stack.pop()
70
+ }
71
+ }
72
+
73
+ public apply(command: Command, type: UniversalDependencyType, transitionSystem: TransitionSystem){
74
+ switch (transitionSystem){
75
+ case TransitionSystem.ARC_STANDARD:
76
+ switch (command) {
77
+ case Command.LEFTARC:
78
+ this.applyLeftArc(type)
79
+ break
80
+ case Command.RIGHTARC:
81
+ this.applyRightArc(type)
82
+ break
83
+ case Command.SHIFT:
84
+ this.applyShift()
85
+ break
86
+ }
87
+ break
88
+ case TransitionSystem.ARC_EAGER:
89
+ switch (command){
90
+ case Command.LEFTARC:
91
+ this.applyArcEagerLeftArc(type)
92
+ break
93
+ case Command.RIGHTARC:
94
+ this.applyArcEagerRightArc(type)
95
+ break
96
+ case Command.SHIFT:
97
+ this.applyShift()
98
+ break
99
+ case Command.REDUCE:
100
+ this.applyReduce()
101
+ break
102
+ }
103
+ }
104
+ }
105
+
106
+ public relationSize(): number{
107
+ return this.relations.length
108
+ }
109
+
110
+ public wordListSize(): number{
111
+ return this.wordList.length
112
+ }
113
+
114
+ public stackSize(): number{
115
+ return this.stack.length
116
+ }
117
+
118
+ public getStackWord(index: number): UniversalDependencyTreeBankWord{
119
+ let size = this.stack.length - 1
120
+ if (size - index < 0){
121
+ return null
122
+ }
123
+ return this.stack[size - index].getWord()
124
+ }
125
+
126
+ public getPeek(): UniversalDependencyTreeBankWord{
127
+ if (this.stack.length > 0){
128
+ return this.stack[this.stack.length - 1].getWord()
129
+ }
130
+ return null
131
+ }
132
+
133
+ public getWordListWord(index: number): UniversalDependencyTreeBankWord{
134
+ if (index > this.wordList.length - 1){
135
+ return null
136
+ }
137
+ return this.wordList[index].getWord()
138
+ }
139
+
140
+ public getRelation(index: number): StackRelation{
141
+ if (index < this.relations.length){
142
+ return this.relations[index]
143
+ }
144
+ return null
145
+ }
146
+
147
+ public clone(): State{
148
+ let o = new State(new Array<StackWord>(), new Array<StackWord>(), new Array<StackRelation>())
149
+ for (let element of this.stack){
150
+ if (element.getWord().getName() != "root"){
151
+ o.stack.push(element.clone())
152
+ } else {
153
+ o.stack.push(new StackWord(new UniversalDependencyTreeBankWord(), element.getToWord()))
154
+ }
155
+ }
156
+ for (let word of this.wordList){
157
+ o.wordList.push(word.clone())
158
+ }
159
+ for (let relation of this.relations){
160
+ if (relation.getWord().getName() != "root"){
161
+ o.relations.push(relation.clone())
162
+ } else {
163
+ o.relations.push(new StackRelation(new UniversalDependencyTreeBankWord(), relation.getRelation()))
164
+ }
165
+ }
166
+ return o
167
+ }
168
+ }
@@ -0,0 +1,187 @@
1
+ import {
2
+ UniversalDependencyTreeBankSentence
3
+ } from "nlptoolkit-dependencyparser/dist/Universal/UniversalDependencyTreeBankSentence";
4
+ import {
5
+ UniversalDependencyTreeBankWord
6
+ } from "nlptoolkit-dependencyparser/dist/Universal/UniversalDependencyTreeBankWord";
7
+ import {
8
+ UniversalDependencyTreeBankCorpus
9
+ } from "nlptoolkit-dependencyparser/dist/Universal/UniversalDependencyTreeBankCorpus";
10
+ import {DataSet} from "nlptoolkit-classification/dist/DataSet/DataSet";
11
+ import {Instance} from "nlptoolkit-classification/dist/Instance/Instance";
12
+ import {Oracle} from "./Oracle";
13
+ import {Agenda} from "./Agenda";
14
+ import {State} from "./State";
15
+ import {StackWord} from "./StackWord";
16
+ import {StackRelation} from "./StackRelation";
17
+ import {Candidate} from "./Candidate";
18
+ import {UniversalDependencyType} from "nlptoolkit-dependencyparser/dist/Universal/UniversalDependencyType";
19
+ import {UniversalDependencyRelation} from "nlptoolkit-dependencyparser/dist/Universal/UniversalDependencyRelation";
20
+ import {ScoringOracle} from "./ScoringOracle";
21
+
22
+ export abstract class TransitionParser {
23
+
24
+ /**
25
+ * Parses a single sentence and returns a list of instances that represent the parsing process.
26
+ * @param sentence the sentence to be parsed
27
+ * @param windowSize the size of the window used in parsing
28
+ * @return a list of {@link Instance} objects representing the parsing process
29
+ */
30
+ abstract simulateParse(sentence: UniversalDependencyTreeBankSentence, windowSize: number): Array<Instance>
31
+
32
+ /**
33
+ * Parses a single sentence using a specified oracle and returns the parsed sentence with dependencies.
34
+ * @param universalDependencyTreeBankSentence the sentence to be parsed
35
+ * @param oracle the oracle used for guiding the parsing process
36
+ * @return a {@link UniversalDependencyTreeBankSentence} with dependencies parsed
37
+ */
38
+ abstract dependencyParse(universalDependencyTreeBankSentence: UniversalDependencyTreeBankSentence, oracle: Oracle): UniversalDependencyTreeBankSentence
39
+
40
+ protected constructor() {
41
+ }
42
+
43
+ /**
44
+ * Creates a new {@link UniversalDependencyTreeBankSentence} with the same words as the input sentence,
45
+ * but with null heads, effectively cloning the sentence structure without dependencies.
46
+ * @param universalDependencyTreeBankSentence the sentence to be cloned
47
+ * @return a new {@link UniversalDependencyTreeBankSentence} with copied words but no dependencies
48
+ */
49
+ protected createResultSentence(universalDependencyTreeBankSentence: UniversalDependencyTreeBankSentence): UniversalDependencyTreeBankSentence{
50
+ let sentence = new UniversalDependencyTreeBankSentence()
51
+ for (let i = 0; i < universalDependencyTreeBankSentence.wordCount(); i++) {
52
+ let word = universalDependencyTreeBankSentence.getWord(i)
53
+ if (word instanceof UniversalDependencyTreeBankWord){
54
+ sentence.addWord(new UniversalDependencyTreeBankWord(word.getId(), word.getName(), word.getLemma(), word.getUpos(), word.getXpos(), word.getFeatures(), null, word.getDeps(), word.getMisc()))
55
+ }
56
+ }
57
+ return sentence;
58
+ }
59
+
60
+ /**
61
+ * Simulates parsing a corpus of sentences, returning a dataset of instances created by parsing each sentence.
62
+ * @param corpus the corpus to be parsed
63
+ * @param windowSize the size of the window used in parsing
64
+ * @return a {@link DataSet} containing instances from parsing each sentence in the corpus
65
+ */
66
+ public simulateParseOnCorpus(corpus: UniversalDependencyTreeBankCorpus, windowSize: number): DataSet{
67
+ let dataSet = new DataSet()
68
+ for (let i = 0; i < corpus.sentenceCount(); i++) {
69
+ let sentence = corpus.getSentence(i)
70
+ if (sentence instanceof UniversalDependencyTreeBankSentence){
71
+ dataSet.addInstanceList(this.simulateParse(sentence, windowSize))
72
+ }
73
+ }
74
+ return dataSet;
75
+ }
76
+
77
+ /**
78
+ * Checks if there are any states in the agenda that still have words to process or have more than one item in the stack.
79
+ * @param agenda the agenda containing the states
80
+ * @return true if there are states to process, false otherwise
81
+ */
82
+ private checkStates(agenda: Agenda): boolean{
83
+ for (let state of agenda.getKeySet()){
84
+ if (state.wordListSize() > 0 || state.stackSize() > 1){
85
+ return true
86
+ }
87
+ }
88
+ return false
89
+ }
90
+
91
+ /**
92
+ * Initializes the parsing state with a stack containing one empty {@link StackWord} and a word list containing all words in the sentence.
93
+ * @param sentence the sentence to initialize the state with
94
+ * @return a {@link State} representing the starting point for parsing
95
+ */
96
+ protected initialState(sentence: UniversalDependencyTreeBankSentence): State{
97
+ let wordList = new Array<StackWord>();
98
+ for (let i = 0; i < sentence.wordCount(); i++) {
99
+ let word = sentence.getWord(i)
100
+ if (word instanceof UniversalDependencyTreeBankWord){
101
+ wordList.push(new StackWord(word, i + 1))
102
+ }
103
+ }
104
+ let stack = new Array<StackWord>()
105
+ stack.push(new StackWord())
106
+ return new State(stack, wordList, new Array<StackRelation>())
107
+ }
108
+
109
+ /**
110
+ * Constructs possible parsing candidates based on the current state and transition system.
111
+ * @param transitionSystem the transition system used (ARC_STANDARD or ARC_EAGER)
112
+ * @param state the current parsing state
113
+ * @return a list of possible {@link Candidate} actions to be applied
114
+ */
115
+ private constructCandidates(transitionSystem: TransitionSystem, state: State): Array<Candidate>{
116
+ if (state.stackSize() == 1 && state.wordListSize() == 0) {
117
+ return new Array<Candidate>()
118
+ }
119
+ let subsets = new Array<Candidate>()
120
+ if (state.wordListSize() > 0) {
121
+ subsets.push(new Candidate(Command.SHIFT, UniversalDependencyType.DEP))
122
+ }
123
+ if (transitionSystem == TransitionSystem.ARC_EAGER && state.stackSize() > 0) {
124
+ subsets.push(new Candidate(Command.REDUCE, UniversalDependencyType.DEP))
125
+ }
126
+ for (let i = 0; i < UniversalDependencyRelation.universalDependencyTypes.length; i++) {
127
+ let type = UniversalDependencyRelation.getDependencyTag(UniversalDependencyRelation.universalDependencyTypes[i])
128
+ if (transitionSystem == TransitionSystem.ARC_STANDARD && state.stackSize() > 1) {
129
+ subsets.push(new Candidate(Command.LEFTARC, type))
130
+ subsets.push(new Candidate(Command.RIGHTARC, type))
131
+ } else if (transitionSystem == TransitionSystem.ARC_EAGER && state.stackSize() > 0 && state.wordListSize() > 0) {
132
+ subsets.push(new Candidate(Command.LEFTARC, type))
133
+ subsets.push(new Candidate(Command.RIGHTARC, type))
134
+ }
135
+ }
136
+ return subsets
137
+ }
138
+
139
+ /**
140
+ * Performs dependency parsing with beam search to find the best parse for a given sentence.
141
+ * @param oracle the scoring oracle used for guiding the search
142
+ * @param beamSize the size of the beam for beam search
143
+ * @param universalDependencyTreeBankSentence the sentence to be parsed
144
+ * @param transitionSystem the transition system used (ARC_STANDARD or ARC_EAGER)
145
+ * @return the best parsing state from the beam search
146
+ */
147
+ public dependencyParseWithBeamSearch(oracle: ScoringOracle,
148
+ beamSize: number,
149
+ universalDependencyTreeBankSentence: UniversalDependencyTreeBankSentence,
150
+ transitionSystem: TransitionSystem): State{
151
+ let sentence = this.createResultSentence(universalDependencyTreeBankSentence)
152
+ let initialState = this.initialState(sentence)
153
+ let agenda = new Agenda(beamSize)
154
+ agenda.updateAgenda(oracle, initialState.clone())
155
+ while (this.checkStates(agenda)) {
156
+ for (let state of agenda.getKeySet()) {
157
+ let subsets = this.constructCandidates(transitionSystem, state)
158
+ for (let subset of subsets) {
159
+ let command = subset.getCommand()
160
+ let type = subset.getUniversalDependencyType()
161
+ let cloneState = state.clone()
162
+ cloneState.apply(command, type, transitionSystem)
163
+ agenda.updateAgenda(oracle, cloneState.clone())
164
+ }
165
+ }
166
+ }
167
+ return agenda.best()
168
+ }
169
+
170
+ /**
171
+ * Parses a corpus of sentences using the given oracle and returns a new corpus with the parsed sentences.
172
+ * @param universalDependencyTreeBankCorpus the corpus to be parsed
173
+ * @param oracle the oracle used for guiding the parsing process
174
+ * @return a {@link UniversalDependencyTreeBankCorpus} containing the parsed sentences
175
+ */
176
+ public dependencyParseCorpus(universalDependencyTreeBankCorpus: UniversalDependencyTreeBankCorpus,
177
+ oracle: Oracle){
178
+ let corpus = new UniversalDependencyTreeBankCorpus()
179
+ for (let i = 0; i < universalDependencyTreeBankCorpus.sentenceCount(); i++) {
180
+ let sentence = universalDependencyTreeBankCorpus.getSentence(i)
181
+ if (sentence instanceof UniversalDependencyTreeBankSentence){
182
+ corpus.addSentence(this.dependencyParse(sentence, oracle))
183
+ }
184
+ }
185
+ return corpus
186
+ }
187
+ }
@@ -0,0 +1,3 @@
1
+ enum TransitionSystem {
2
+ ARC_STANDARD, ARC_EAGER
3
+ }
@@ -0,0 +1,13 @@
1
+ {
2
+ "compilerOptions": {
3
+ "outDir": "../dist",
4
+ "module": "umd",
5
+ "target": "es6",
6
+ "sourceMap": true,
7
+ "noImplicitAny": true,
8
+ "strictNullChecks": false,
9
+ "removeComments": false,
10
+ "moduleResolution": "node",
11
+ "declaration": true
12
+ }
13
+ }