nlptoolkit-morphologicalanalysis 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/README.md +144 -0
  2. package/dist/Corpus/DisambiguatedWord.d.ts +20 -0
  3. package/dist/Corpus/DisambiguatedWord.js +38 -0
  4. package/dist/Corpus/DisambiguatedWord.js.map +1 -0
  5. package/dist/Corpus/DisambiguationCorpus.d.ts +4 -0
  6. package/dist/Corpus/DisambiguationCorpus.js +54 -0
  7. package/dist/Corpus/DisambiguationCorpus.js.map +1 -0
  8. package/dist/MorphologicalAnalysis/FiniteStateMachine.d.ts +63 -0
  9. package/dist/MorphologicalAnalysis/FiniteStateMachine.js +178 -0
  10. package/dist/MorphologicalAnalysis/FiniteStateMachine.js.map +1 -0
  11. package/dist/MorphologicalAnalysis/FsmMorphologicalAnalyzer.d.ts +399 -0
  12. package/dist/MorphologicalAnalysis/FsmMorphologicalAnalyzer.js +1255 -0
  13. package/dist/MorphologicalAnalysis/FsmMorphologicalAnalyzer.js.map +1 -0
  14. package/dist/MorphologicalAnalysis/FsmParse.d.ts +290 -0
  15. package/dist/MorphologicalAnalysis/FsmParse.js +684 -0
  16. package/dist/MorphologicalAnalysis/FsmParse.js.map +1 -0
  17. package/dist/MorphologicalAnalysis/FsmParseList.d.ts +96 -0
  18. package/dist/MorphologicalAnalysis/FsmParseList.js +242 -0
  19. package/dist/MorphologicalAnalysis/FsmParseList.js.map +1 -0
  20. package/dist/MorphologicalAnalysis/InflectionalGroup.d.ts +77 -0
  21. package/dist/MorphologicalAnalysis/InflectionalGroup.js +213 -0
  22. package/dist/MorphologicalAnalysis/InflectionalGroup.js.map +1 -0
  23. package/dist/MorphologicalAnalysis/MetamorphicParse.d.ts +63 -0
  24. package/dist/MorphologicalAnalysis/MetamorphicParse.js +592 -0
  25. package/dist/MorphologicalAnalysis/MetamorphicParse.js.map +1 -0
  26. package/dist/MorphologicalAnalysis/MorphologicalParse.d.ts +301 -0
  27. package/dist/MorphologicalAnalysis/MorphologicalParse.js +969 -0
  28. package/dist/MorphologicalAnalysis/MorphologicalParse.js.map +1 -0
  29. package/dist/MorphologicalAnalysis/MorphologicalTag.d.ts +510 -0
  30. package/dist/MorphologicalAnalysis/MorphologicalTag.js +525 -0
  31. package/dist/MorphologicalAnalysis/MorphologicalTag.js.map +1 -0
  32. package/dist/MorphologicalAnalysis/State.d.ts +40 -0
  33. package/dist/MorphologicalAnalysis/State.js +64 -0
  34. package/dist/MorphologicalAnalysis/State.js.map +1 -0
  35. package/dist/MorphologicalAnalysis/Transition.d.ts +159 -0
  36. package/dist/MorphologicalAnalysis/Transition.js +751 -0
  37. package/dist/MorphologicalAnalysis/Transition.js.map +1 -0
  38. package/index.js +12 -0
  39. package/package.json +30 -0
  40. package/penntreebank.txt +208431 -0
  41. package/source/Corpus/DisambiguatedWord.ts +29 -0
  42. package/source/Corpus/DisambiguationCorpus.ts +39 -0
  43. package/source/MorphologicalAnalysis/FiniteStateMachine.ts +165 -0
  44. package/source/MorphologicalAnalysis/FsmMorphologicalAnalyzer.ts +1256 -0
  45. package/source/MorphologicalAnalysis/FsmParse.ts +664 -0
  46. package/source/MorphologicalAnalysis/FsmParseList.ts +238 -0
  47. package/source/MorphologicalAnalysis/InflectionalGroup.ts +210 -0
  48. package/source/MorphologicalAnalysis/MetamorphicParse.ts +589 -0
  49. package/source/MorphologicalAnalysis/MorphologicalParse.ts +995 -0
  50. package/source/MorphologicalAnalysis/MorphologicalTag.ts +510 -0
  51. package/source/MorphologicalAnalysis/State.ts +59 -0
  52. package/source/MorphologicalAnalysis/Transition.ts +733 -0
  53. package/source/tsconfig.json +13 -0
  54. package/tests/DisambiguationCorpusTest.ts +12 -0
  55. package/tests/FiniteStateMachineTest.ts +87 -0
  56. package/tests/FsmMorphologicalAnalyzerTest.ts +204 -0
  57. package/tests/FsmParseListTest.ts +90 -0
  58. package/tests/FsmParseTest.ts +66 -0
  59. package/tests/InflectionalGroupTest.ts +84 -0
  60. package/tests/MorphologicalParseTest.ts +152 -0
  61. package/tests/TransitionTest.ts +174 -0
  62. package/tsconfig.json +15 -0
  63. package/turkish_dictionary.txt +62120 -0
  64. package/turkish_finite_state_machine.xml +1887 -0
  65. package/turkish_misspellings.txt +148932 -0
@@ -0,0 +1,29 @@
1
+ import {Word} from "nlptoolkit-dictionary/dist/Dictionary/Word";
2
+ import {MorphologicalParse} from "../MorphologicalAnalysis/MorphologicalParse";
3
+
4
+ export class DisambiguatedWord extends Word{
5
+
6
+ private parse: MorphologicalParse
7
+
8
+ /**
9
+ * The constructor of {@link DisambiguatedWord} class which takes a {@link String} and a {@link MorphologicalParse}
10
+ * as inputs. It creates a new {@link MorphologicalParse} with given MorphologicalParse. It generates a new instance with
11
+ * given {@link String}.
12
+ *
13
+ * @param name Instances that will be a DisambiguatedWord.
14
+ * @param parse {@link MorphologicalParse} of the {@link DisambiguatedWord}.
15
+ */
16
+ constructor(name: string, parse: MorphologicalParse) {
17
+ super(name);
18
+ this.parse = parse
19
+ }
20
+
21
+ /**
22
+ * Accessor for the {@link MorphologicalParse}.
23
+ *
24
+ * @return MorphologicalParse.
25
+ */
26
+ getParse(): MorphologicalParse{
27
+ return this.parse
28
+ }
29
+ }
@@ -0,0 +1,39 @@
1
+ import {Corpus} from "nlptoolkit-corpus/dist/Corpus";
2
+ import * as fs from "fs";
3
+ import {DisambiguatedWord} from "./DisambiguatedWord";
4
+ import {MorphologicalParse} from "../MorphologicalAnalysis/MorphologicalParse";
5
+ import {Sentence} from "nlptoolkit-corpus/dist/Sentence";
6
+
7
+ export class DisambiguationCorpus extends Corpus{
8
+
9
+ constructor(fileName: string) {
10
+ super();
11
+ if (fileName != undefined){
12
+ let newSentence = undefined;
13
+ let data = fs.readFileSync(fileName, 'utf8')
14
+ let lines = data.split("\n")
15
+ for (let line of lines) {
16
+ let word = line.substring(0, line.indexOf("\t"));
17
+ let parse = line.substring(line.indexOf("\t") + 1);
18
+ if (word != "" && parse != "") {
19
+ let newWord = new DisambiguatedWord(word, new MorphologicalParse(parse));
20
+ if (word == "<S>") {
21
+ newSentence = new Sentence();
22
+ } else {
23
+ if (word == "</S>") {
24
+ this.addSentence(newSentence);
25
+ } else {
26
+ if (word == "<DOC>" || word == "</DOC>" || word == "<TITLE>" || word == "</TITLE>") {
27
+ } else {
28
+ if (newSentence != null) {
29
+ newSentence.addWord(newWord);
30
+ }
31
+ }
32
+ }
33
+ }
34
+ }
35
+ }
36
+ }
37
+ }
38
+
39
+ }
@@ -0,0 +1,165 @@
1
+ import {State} from "./State";
2
+ import {Transition} from "./Transition";
3
+ import {XmlDocument} from "nlptoolkit-xmlparser/dist/XmlDocument";
4
+
5
+ export class FiniteStateMachine {
6
+
7
+ private states: Array<State> = new Array<State>()
8
+ private transitions: Map<State, Array<Transition>> = new Map<State, Array<Transition>>()
9
+
10
+ /**
11
+ * Constructor reads the finite state machine in the given input file. It has a NodeList which holds the states
12
+ * of the nodes and there are 4 different type of nodes; stateNode, root Node, transitionNode and withNode.
13
+ * Also there are two states; state that a node currently in and state that a node will be in.
14
+ * <p>
15
+ * DOMParser is used to parse the given file. Firstly it gets the document to parse, then gets its elements by the
16
+ * tag names. For instance, it gets states by the tag name 'state' and puts them into an ArrayList called stateList.
17
+ * Secondly, it traverses this stateList and gets each Node's attributes. There are three attributes; name, start,
18
+ * and end which will be named as states. If a node is in a startState it is tagged as 'yes', otherwise 'no'.
19
+ * Also, if a node is in a startState, additional attribute will be fetched; originalPos that represents its original
20
+ * part of speech.
21
+ * <p>
22
+ * At the last step, by starting rootNode's first child, it gets all the transitionNodes and next states called toState,
23
+ * then continue with the nextSiblings. Also, if there is no possible toState, it prints this case and the causative states.
24
+ *
25
+ * @param fileName the resource file to read the finite state machine. Only files in resources folder are supported.
26
+ */
27
+ constructor(fileName: string = "turkish_finite_state_machine.xml") {
28
+ let xmlDocument = new XmlDocument(fileName)
29
+ xmlDocument.parse();
30
+ let stateListNode = xmlDocument.getFirstChild();
31
+ let stateNode = stateListNode.getFirstChild();
32
+ while (stateNode != undefined) {
33
+ if (stateNode.hasAttributes()) {
34
+ let stateName = stateNode.getAttributeValue("name");
35
+ let startState = stateNode.getAttributeValue("start");
36
+ let endState = stateNode.getAttributeValue("end");
37
+ let state
38
+ if (startState == "yes") {
39
+ let originalPos = stateNode.getAttributeValue("originalpos");
40
+ state = new State(stateName, true, endState == "yes", originalPos);
41
+ } else {
42
+ state = new State(stateName, false, endState == "yes");
43
+ }
44
+ this.states.push(state);
45
+ }
46
+ stateNode = stateNode.getNextSibling();
47
+ }
48
+ stateNode = stateListNode.getFirstChild();
49
+ while (stateNode != undefined){
50
+ if (stateNode.hasAttributes()){
51
+ let stateName = stateNode.getAttributeValue("name");
52
+ let state = this.getState(stateName);
53
+ let transitionNode = stateNode.getFirstChild();
54
+ while (transitionNode != undefined){
55
+ if (transitionNode.hasAttributes()){
56
+ let toStateName = transitionNode.getAttributeValue("name");
57
+ let toState = this.getState(toStateName);
58
+ let withName = transitionNode.getAttributeValue("transitionname");
59
+ let rootToPos = transitionNode.getAttributeValue("topos");
60
+ let withNode = transitionNode.getFirstChild();
61
+ while (withNode != undefined){
62
+ let toPos
63
+ if (withNode.hasAttributes()){
64
+ withName = withNode.getAttributeValue("name");
65
+ toPos = withNode.getAttributeValue("topos");
66
+ } else {
67
+ toPos = "";
68
+ }
69
+ if (toPos == ""){
70
+ if (rootToPos == ""){
71
+ this.addTransition(state, toState, withNode.getPcData(), withName);
72
+ } else {
73
+ this.addTransition(state, toState, withNode.getPcData(), withName, rootToPos);
74
+ }
75
+ } else {
76
+ this.addTransition(state, toState, withNode.getPcData(), withName, toPos);
77
+ }
78
+ withNode = withNode.getNextSibling();
79
+ }
80
+ }
81
+ transitionNode = transitionNode.getNextSibling();
82
+ }
83
+ }
84
+ stateNode = stateNode.getNextSibling();
85
+ }
86
+ }
87
+
88
+ /**
89
+ * The isValidTransition loops through states ArrayList and checks transitions between states. If the actual transition
90
+ * equals to the given transition input, method returns true otherwise returns false.
91
+ *
92
+ * @param transition is used to compare with the actual transition of a state.
93
+ * @return true when the actual transition equals to the transition input, false otherwise.
94
+ */
95
+ isValidTransition(transition: string): boolean{
96
+ for (let state of this.transitions.keys()) {
97
+ for (let transition1 of this.transitions.get(state)) {
98
+ if (transition1.toString() != undefined && transition1.toString() == transition) {
99
+ return true;
100
+ }
101
+ }
102
+ }
103
+ return false;
104
+ }
105
+
106
+ /**
107
+ * the getStates method returns the states in the FiniteStateMachine.
108
+ * @return StateList.
109
+ */
110
+ getStates(): Array<State>{
111
+ return this.states
112
+ }
113
+
114
+ /**
115
+ * The getState method is used to loop through the states {@link Array} and return the state whose name equal
116
+ * to the given input name.
117
+ *
118
+ * @param name is used to compare with the state's actual name.
119
+ * @return state if found any, null otherwise.
120
+ */
121
+ getState(name: string): State{
122
+ for (let state of this.states) {
123
+ if (state.getName() == name) {
124
+ return state;
125
+ }
126
+ }
127
+ return undefined;
128
+ }
129
+
130
+ /**
131
+ * Another addTransition method which takes additional argument; toPos and. It creates a new {@link Transition}
132
+ * with given input parameters and adds the transition to transitions {@link Array}.
133
+ *
134
+ * @param fromState State type input indicating the from state.
135
+ * @param toState State type input indicating the next state.
136
+ * @param _with String input indicating with what the transition will be made.
137
+ * @param withName String input.
138
+ * @param toPos String input.
139
+ */
140
+ addTransition(fromState: State, toState: State, _with: string, withName: string, toPos?: string){
141
+ let newTransition = new Transition(_with, withName, toState, toPos);
142
+ let transitionList
143
+ if (this.transitions.has(fromState)){
144
+ transitionList = this.transitions.get(fromState);
145
+ } else {
146
+ transitionList = new Array<Transition>()
147
+ }
148
+ transitionList.push(newTransition);
149
+ this.transitions.set(fromState, transitionList);
150
+ }
151
+
152
+ /**
153
+ * The getTransitions method returns the transitions at the given state.
154
+ *
155
+ * @param state State input.
156
+ * @return transitions at given state.
157
+ */
158
+ getTransitions(state: State): Array<Transition>{
159
+ if (this.transitions.has(state)){
160
+ return this.transitions.get(state);
161
+ } else {
162
+ return new Array<Transition>();
163
+ }
164
+ }
165
+ }