nlptoolkit-morphologicalanalysis 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/README.md +144 -0
  2. package/dist/Corpus/DisambiguatedWord.d.ts +20 -0
  3. package/dist/Corpus/DisambiguatedWord.js +38 -0
  4. package/dist/Corpus/DisambiguatedWord.js.map +1 -0
  5. package/dist/Corpus/DisambiguationCorpus.d.ts +4 -0
  6. package/dist/Corpus/DisambiguationCorpus.js +54 -0
  7. package/dist/Corpus/DisambiguationCorpus.js.map +1 -0
  8. package/dist/MorphologicalAnalysis/FiniteStateMachine.d.ts +63 -0
  9. package/dist/MorphologicalAnalysis/FiniteStateMachine.js +178 -0
  10. package/dist/MorphologicalAnalysis/FiniteStateMachine.js.map +1 -0
  11. package/dist/MorphologicalAnalysis/FsmMorphologicalAnalyzer.d.ts +399 -0
  12. package/dist/MorphologicalAnalysis/FsmMorphologicalAnalyzer.js +1255 -0
  13. package/dist/MorphologicalAnalysis/FsmMorphologicalAnalyzer.js.map +1 -0
  14. package/dist/MorphologicalAnalysis/FsmParse.d.ts +290 -0
  15. package/dist/MorphologicalAnalysis/FsmParse.js +684 -0
  16. package/dist/MorphologicalAnalysis/FsmParse.js.map +1 -0
  17. package/dist/MorphologicalAnalysis/FsmParseList.d.ts +96 -0
  18. package/dist/MorphologicalAnalysis/FsmParseList.js +242 -0
  19. package/dist/MorphologicalAnalysis/FsmParseList.js.map +1 -0
  20. package/dist/MorphologicalAnalysis/InflectionalGroup.d.ts +77 -0
  21. package/dist/MorphologicalAnalysis/InflectionalGroup.js +213 -0
  22. package/dist/MorphologicalAnalysis/InflectionalGroup.js.map +1 -0
  23. package/dist/MorphologicalAnalysis/MetamorphicParse.d.ts +63 -0
  24. package/dist/MorphologicalAnalysis/MetamorphicParse.js +592 -0
  25. package/dist/MorphologicalAnalysis/MetamorphicParse.js.map +1 -0
  26. package/dist/MorphologicalAnalysis/MorphologicalParse.d.ts +301 -0
  27. package/dist/MorphologicalAnalysis/MorphologicalParse.js +969 -0
  28. package/dist/MorphologicalAnalysis/MorphologicalParse.js.map +1 -0
  29. package/dist/MorphologicalAnalysis/MorphologicalTag.d.ts +510 -0
  30. package/dist/MorphologicalAnalysis/MorphologicalTag.js +525 -0
  31. package/dist/MorphologicalAnalysis/MorphologicalTag.js.map +1 -0
  32. package/dist/MorphologicalAnalysis/State.d.ts +40 -0
  33. package/dist/MorphologicalAnalysis/State.js +64 -0
  34. package/dist/MorphologicalAnalysis/State.js.map +1 -0
  35. package/dist/MorphologicalAnalysis/Transition.d.ts +159 -0
  36. package/dist/MorphologicalAnalysis/Transition.js +751 -0
  37. package/dist/MorphologicalAnalysis/Transition.js.map +1 -0
  38. package/index.js +12 -0
  39. package/package.json +30 -0
  40. package/penntreebank.txt +208431 -0
  41. package/source/Corpus/DisambiguatedWord.ts +29 -0
  42. package/source/Corpus/DisambiguationCorpus.ts +39 -0
  43. package/source/MorphologicalAnalysis/FiniteStateMachine.ts +165 -0
  44. package/source/MorphologicalAnalysis/FsmMorphologicalAnalyzer.ts +1256 -0
  45. package/source/MorphologicalAnalysis/FsmParse.ts +664 -0
  46. package/source/MorphologicalAnalysis/FsmParseList.ts +238 -0
  47. package/source/MorphologicalAnalysis/InflectionalGroup.ts +210 -0
  48. package/source/MorphologicalAnalysis/MetamorphicParse.ts +589 -0
  49. package/source/MorphologicalAnalysis/MorphologicalParse.ts +995 -0
  50. package/source/MorphologicalAnalysis/MorphologicalTag.ts +510 -0
  51. package/source/MorphologicalAnalysis/State.ts +59 -0
  52. package/source/MorphologicalAnalysis/Transition.ts +733 -0
  53. package/source/tsconfig.json +13 -0
  54. package/tests/DisambiguationCorpusTest.ts +12 -0
  55. package/tests/FiniteStateMachineTest.ts +87 -0
  56. package/tests/FsmMorphologicalAnalyzerTest.ts +204 -0
  57. package/tests/FsmParseListTest.ts +90 -0
  58. package/tests/FsmParseTest.ts +66 -0
  59. package/tests/InflectionalGroupTest.ts +84 -0
  60. package/tests/MorphologicalParseTest.ts +152 -0
  61. package/tests/TransitionTest.ts +174 -0
  62. package/tsconfig.json +15 -0
  63. package/turkish_dictionary.txt +62120 -0
  64. package/turkish_finite_state_machine.xml +1887 -0
  65. package/turkish_misspellings.txt +148932 -0
package/README.md ADDED
@@ -0,0 +1,144 @@
1
+ Morphological Analysis
2
+ ============
3
+
4
+ ## Morphology
5
+
6
+ In linguistics, the term morphology refers to the study of the internal structure of words. Each word is assumed to consist of one or more morphemes, which can be defined as the smallest linguistic unit having a particular meaning or grammatical function. One can come across morphologically simplex words, i.e. roots, as well as morphologically complex ones, such as compounds or affixed forms.
7
+
8
+ Batı-lı-laş-tır-ıl-ama-yan-lar-dan-mış-ız
9
+ west-With-Make-Caus-Pass-Neg.Abil-Nom-Pl-Abl-Evid-A3Pl
10
+ ‘It appears that we are among the ones that cannot be westernized.’
11
+
12
+ The morphemes that constitute a word combine in a (more or less) strict order. Most morphologically complex words are in the ”ROOT-SUFFIX1-SUFFIX2-...” structure. Affixes have two types: (i) derivational affixes, which change the meaning and sometimes also the grammatical category of the base they are attached to, and (ii) inflectional affixes serving particular grammatical functions. In general, derivational suffixes precede inflectional ones. The order of derivational suffixes is reflected on the meaning of the derived form. For instance, consider the combination of the noun göz ‘eye’ with two derivational suffixes -lIK and -CI: Even though the same three morphemes are used, the meaning of a word like gözcülük ‘scouting’ is clearly different from that of gözlükçü ‘optician’.
13
+
14
+ ## Dilbaz
15
+
16
+ Here we present a new morphological analyzer, which is (i) open: The latest version of source codes, the lexicon, and the morphotactic rule engine are all available here, (ii) extendible: One of the disadvantages of other morphological analyzers is that their lexicons are fixed or unmodifiable, which prevents to add new bare-forms to the morphological analyzer. In our morphological analyzer, the lexicon is in text form and is easily modifiable, (iii) fast: Morphological analysis is one of the core components of any NLP process. It must be very fast to handle huge corpora. Compared to other morphological analyzers, our analyzer is capable of analyzing hundreds of thousands words per second, which makes it one of the fastest Turkish morphological analyzers available.
17
+
18
+ The morphological analyzer consists of five main components, namely, a lexicon, a finite state transducer, a rule engine for suffixation, a trie data structure, and a least recently used (LRU) cache.
19
+
20
+ In this analyzer, we assume all idiosyncratic information to be encoded in the lexicon. While phonologically conditioned allomorphy will be dealt with by the transducer, other types of allomorphy, all exceptional forms to otherwise regular processes, as well as words formed through derivation (except for the few transparently compositional derivational suffixes are considered to be included in the lexicon.
21
+
22
+ In our morphological analyzer, finite state transducer is encoded in an xml file.
23
+
24
+ To overcome the irregularities and also to accelerate the search for the bareforms, we use a trie data structure in our morphological analyzer, and store all words in our lexicon in that data structure. For the regular words, we only store that word in our trie, whereas for irregular words we store both the original form and some prefix of that word.
25
+
26
+ For Developers
27
+ ============
28
+
29
+ You can also see [Python](https://github.com/starlangsoftware/TurkishMorphologicalAnalysis-Py),
30
+ [Java](https://github.com/starlangsoftware/TurkishMorphologicalAnalysis), [C++](https://github.com/starlangsoftware/TurkishMorphologicalAnalysis-CPP),
31
+ [Swift](https://github.com/starlangsoftware/TurkishMorphologicalAnalysis-Swift), [Cython](https://github.com/starlangsoftware/TurkishMorphologicalAnalysis-Cy),
32
+ or [C#](https://github.com/starlangsoftware/TurkishMorphologicalAnalysis-CS) repository.
33
+
34
+ Detailed Description
35
+ ============
36
+
37
+ + [Creating FsmMorphologicalAnalyzer](#creating-fsmmorphologicalanalyzer)
38
+ + [Word level morphological analysis](#word-level-morphological-analysis)
39
+ + [Sentence level morphological analysis](#sentence-level-morphological-analysis)
40
+
41
+ ## Creating FsmMorphologicalAnalyzer
42
+
43
+ FsmMorphologicalAnalyzer provides Turkish morphological analysis. This class can be created as follows:
44
+
45
+ let fsm = new FsmMorphologicalAnalyzer();
46
+
47
+ This generates a new `TxtDictionary` type dictionary from [`turkish_dictionary.txt`](https://github.com/olcaytaner/Dictionary/tree/master/src/main/resources) with fixed cache size 100000 and by using [`turkish_finite_state_machine.xml`](https://github.com/olcaytaner/MorphologicalAnalysis/tree/master/src/main/resources).
48
+
49
+ Creating a morphological analyzer with different cache size, dictionary or finite state machine is also possible.
50
+ * With different cache size,
51
+
52
+ let fsm = new FsmMorphologicalAnalyzer(50000);
53
+
54
+ * Using a different dictionary,
55
+
56
+ let fsm = new FsmMorphologicalAnalyzer("my_turkish_dictionary.txt");
57
+
58
+ * Specifying both finite state machine and dictionary,
59
+
60
+ let fsm = new FsmMorphologicalAnalyzer("fsm.xml", "my_turkish_dictionary.txt") ;
61
+
62
+ * Giving finite state machine and cache size with creating `TxtDictionary` object,
63
+
64
+ let dictionary = new TxtDictionary("my_turkish_dictionary.txt", WordComparator.TURKISH);
65
+ let fsm = new FsmMorphologicalAnalyzer("fsm.xml", dictionary, 50000) ;
66
+
67
+ * With different finite state machine and creating `TxtDictionary` object,
68
+
69
+ let dictionary = new TxtDictionary("my_turkish_dictionary.txt", WordComparator.TURKISH, "my_turkish_misspelled.txt");
70
+ let fsm = new FsmMorphologicalAnalyzer("fsm.xml", dictionary);
71
+
72
+ ## Word level morphological analysis
73
+
74
+ For morphological analysis, `MorphologicalAnalysis(String word)` method of `FsmMorphologicalAnalyzer` is used. This returns `FsmParseList` object.
75
+
76
+
77
+ let fsm = new FsmMorphologicalAnalyzer();
78
+ let word = "yarına";
79
+ let fsmParseList = fsm.morphologicalAnalysis(word);
80
+ for (let i = 0; i < fsmParseList.size(); i++){
81
+ console.log(fsmParseList.getFsmParse(i).getTransitionList();
82
+ }
83
+
84
+ Output
85
+
86
+ yar+NOUN+A3SG+P2SG+DAT
87
+ yar+NOUN+A3SG+P3SG+DAT
88
+ yarı+NOUN+A3SG+P2SG+DAT
89
+ yarın+NOUN+A3SG+PNON+DAT
90
+
91
+ From `FsmParseList`, a single `FsmParse` can be obtained as follows:
92
+
93
+ let parse = fsmParseList.getFsmParse(0);
94
+ console.log(parse.getTransitionList();
95
+
96
+ Output
97
+
98
+ yar+NOUN+A3SG+P2SG+DAT
99
+
100
+ ## Sentence level morphological analysis
101
+ `morphologicalAnalysis(Sentence sentence)` method of `FsmMorphologicalAnalyzer` is used. This returns `FsmParseList[]` object.
102
+
103
+ let fsm = new FsmMorphologicalAnalyzer();
104
+ let sentence = new Sentence("Yarın doktora gidecekler");
105
+ let parseLists = fsm.morphologicalAnalysis(sentence);
106
+ for(let i = 0; i < parseLists.length; i++){
107
+ for(let j = 0; j < parseLists[i].size(); j++){
108
+ let parse = parseLists[i].getFsmParse(j);
109
+ console.log(parse.getTransitionList());
110
+ }
111
+ console.log("-----------------");
112
+ }
113
+
114
+ Output
115
+
116
+ -----------------
117
+ yar+NOUN+A3SG+P2SG+NOM
118
+ yar+NOUN+A3SG+PNON+GEN
119
+ yar+VERB+POS+IMP+A2PL
120
+ yarı+NOUN+A3SG+P2SG+NOM
121
+ yarın+NOUN+A3SG+PNON+NOM
122
+ -----------------
123
+ doktor+NOUN+A3SG+PNON+DAT
124
+ doktora+NOUN+A3SG+PNON+NOM
125
+ -----------------
126
+ git+VERB+POS+FUT+A3PL
127
+ git+VERB+POS^DB+NOUN+FUTPART+A3PL+PNON+NOM
128
+
129
+ # Cite
130
+
131
+ @inproceedings{yildiz-etal-2019-open,
132
+ title = "An Open, Extendible, and Fast {T}urkish Morphological Analyzer",
133
+ author = {Y{\i}ld{\i}z, Olcay Taner and
134
+ Avar, Beg{\"u}m and
135
+ Ercan, G{\"o}khan},
136
+ booktitle = "Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019)",
137
+ month = sep,
138
+ year = "2019",
139
+ address = "Varna, Bulgaria",
140
+ publisher = "INCOMA Ltd.",
141
+ url = "https://www.aclweb.org/anthology/R19-1156",
142
+ doi = "10.26615/978-954-452-056-4_156",
143
+ pages = "1364--1372",
144
+ }
@@ -0,0 +1,20 @@
1
+ import { Word } from "nlptoolkit-dictionary/dist/Dictionary/Word";
2
+ import { MorphologicalParse } from "../MorphologicalAnalysis/MorphologicalParse";
3
+ export declare class DisambiguatedWord extends Word {
4
+ private parse;
5
+ /**
6
+ * The constructor of {@link DisambiguatedWord} class which takes a {@link String} and a {@link MorphologicalParse}
7
+ * as inputs. It creates a new {@link MorphologicalParse} with given MorphologicalParse. It generates a new instance with
8
+ * given {@link String}.
9
+ *
10
+ * @param name Instances that will be a DisambiguatedWord.
11
+ * @param parse {@link MorphologicalParse} of the {@link DisambiguatedWord}.
12
+ */
13
+ constructor(name: string, parse: MorphologicalParse);
14
+ /**
15
+ * Accessor for the {@link MorphologicalParse}.
16
+ *
17
+ * @return MorphologicalParse.
18
+ */
19
+ getParse(): MorphologicalParse;
20
+ }
@@ -0,0 +1,38 @@
1
+ (function (factory) {
2
+ if (typeof module === "object" && typeof module.exports === "object") {
3
+ var v = factory(require, exports);
4
+ if (v !== undefined) module.exports = v;
5
+ }
6
+ else if (typeof define === "function" && define.amd) {
7
+ define(["require", "exports", "nlptoolkit-dictionary/dist/Dictionary/Word"], factory);
8
+ }
9
+ })(function (require, exports) {
10
+ "use strict";
11
+ Object.defineProperty(exports, "__esModule", { value: true });
12
+ exports.DisambiguatedWord = void 0;
13
+ const Word_1 = require("nlptoolkit-dictionary/dist/Dictionary/Word");
14
+ class DisambiguatedWord extends Word_1.Word {
15
+ /**
16
+ * The constructor of {@link DisambiguatedWord} class which takes a {@link String} and a {@link MorphologicalParse}
17
+ * as inputs. It creates a new {@link MorphologicalParse} with given MorphologicalParse. It generates a new instance with
18
+ * given {@link String}.
19
+ *
20
+ * @param name Instances that will be a DisambiguatedWord.
21
+ * @param parse {@link MorphologicalParse} of the {@link DisambiguatedWord}.
22
+ */
23
+ constructor(name, parse) {
24
+ super(name);
25
+ this.parse = parse;
26
+ }
27
+ /**
28
+ * Accessor for the {@link MorphologicalParse}.
29
+ *
30
+ * @return MorphologicalParse.
31
+ */
32
+ getParse() {
33
+ return this.parse;
34
+ }
35
+ }
36
+ exports.DisambiguatedWord = DisambiguatedWord;
37
+ });
38
+ //# sourceMappingURL=DisambiguatedWord.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"DisambiguatedWord.js","sourceRoot":"","sources":["../../source/Corpus/DisambiguatedWord.ts"],"names":[],"mappings":";;;;;;;;;;;;IAAA,qEAAgE;IAGhE,MAAa,iBAAkB,SAAQ,WAAI;QAIvC;;;;;;;WAOG;QACH,YAAY,IAAY,EAAE,KAAyB;YAC/C,KAAK,CAAC,IAAI,CAAC,CAAC;YACZ,IAAI,CAAC,KAAK,GAAG,KAAK,CAAA;QACtB,CAAC;QAED;;;;WAIG;QACH,QAAQ;YACJ,OAAO,IAAI,CAAC,KAAK,CAAA;QACrB,CAAC;KACJ;IAzBD,8CAyBC"}
@@ -0,0 +1,4 @@
1
+ import { Corpus } from "nlptoolkit-corpus/dist/Corpus";
2
+ export declare class DisambiguationCorpus extends Corpus {
3
+ constructor(fileName: string);
4
+ }
@@ -0,0 +1,54 @@
1
+ (function (factory) {
2
+ if (typeof module === "object" && typeof module.exports === "object") {
3
+ var v = factory(require, exports);
4
+ if (v !== undefined) module.exports = v;
5
+ }
6
+ else if (typeof define === "function" && define.amd) {
7
+ define(["require", "exports", "nlptoolkit-corpus/dist/Corpus", "fs", "./DisambiguatedWord", "../MorphologicalAnalysis/MorphologicalParse", "nlptoolkit-corpus/dist/Sentence"], factory);
8
+ }
9
+ })(function (require, exports) {
10
+ "use strict";
11
+ Object.defineProperty(exports, "__esModule", { value: true });
12
+ exports.DisambiguationCorpus = void 0;
13
+ const Corpus_1 = require("nlptoolkit-corpus/dist/Corpus");
14
+ const fs = require("fs");
15
+ const DisambiguatedWord_1 = require("./DisambiguatedWord");
16
+ const MorphologicalParse_1 = require("../MorphologicalAnalysis/MorphologicalParse");
17
+ const Sentence_1 = require("nlptoolkit-corpus/dist/Sentence");
18
+ class DisambiguationCorpus extends Corpus_1.Corpus {
19
+ constructor(fileName) {
20
+ super();
21
+ if (fileName != undefined) {
22
+ let newSentence = undefined;
23
+ let data = fs.readFileSync(fileName, 'utf8');
24
+ let lines = data.split("\n");
25
+ for (let line of lines) {
26
+ let word = line.substring(0, line.indexOf("\t"));
27
+ let parse = line.substring(line.indexOf("\t") + 1);
28
+ if (word != "" && parse != "") {
29
+ let newWord = new DisambiguatedWord_1.DisambiguatedWord(word, new MorphologicalParse_1.MorphologicalParse(parse));
30
+ if (word == "<S>") {
31
+ newSentence = new Sentence_1.Sentence();
32
+ }
33
+ else {
34
+ if (word == "</S>") {
35
+ this.addSentence(newSentence);
36
+ }
37
+ else {
38
+ if (word == "<DOC>" || word == "</DOC>" || word == "<TITLE>" || word == "</TITLE>") {
39
+ }
40
+ else {
41
+ if (newSentence != null) {
42
+ newSentence.addWord(newWord);
43
+ }
44
+ }
45
+ }
46
+ }
47
+ }
48
+ }
49
+ }
50
+ }
51
+ }
52
+ exports.DisambiguationCorpus = DisambiguationCorpus;
53
+ });
54
+ //# sourceMappingURL=DisambiguationCorpus.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"DisambiguationCorpus.js","sourceRoot":"","sources":["../../source/Corpus/DisambiguationCorpus.ts"],"names":[],"mappings":";;;;;;;;;;;;IAAA,0DAAqD;IACrD,yBAAyB;IACzB,2DAAsD;IACtD,oFAA+E;IAC/E,8DAAyD;IAEzD,MAAa,oBAAqB,SAAQ,eAAM;QAE5C,YAAY,QAAgB;YACxB,KAAK,EAAE,CAAC;YACR,IAAI,QAAQ,IAAI,SAAS,EAAC;gBACtB,IAAI,WAAW,GAAG,SAAS,CAAC;gBAC5B,IAAI,IAAI,GAAG,EAAE,CAAC,YAAY,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAA;gBAC5C,IAAI,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;gBAC5B,KAAK,IAAI,IAAI,IAAI,KAAK,EAAE;oBACpB,IAAI,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC;oBACjD,IAAI,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;oBACnD,IAAI,IAAI,IAAI,EAAE,IAAI,KAAK,IAAI,EAAE,EAAE;wBAC3B,IAAI,OAAO,GAAG,IAAI,qCAAiB,CAAC,IAAI,EAAE,IAAI,uCAAkB,CAAC,KAAK,CAAC,CAAC,CAAC;wBACzE,IAAI,IAAI,IAAI,KAAK,EAAE;4BACf,WAAW,GAAG,IAAI,mBAAQ,EAAE,CAAC;yBAChC;6BAAM;4BACH,IAAI,IAAI,IAAI,MAAM,EAAE;gCAChB,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC,CAAC;6BACjC;iCAAM;gCACH,IAAI,IAAI,IAAI,OAAO,IAAI,IAAI,IAAI,QAAQ,IAAI,IAAI,IAAI,SAAS,IAAI,IAAI,IAAI,UAAU,EAAE;iCACnF;qCAAM;oCACH,IAAI,WAAW,IAAI,IAAI,EAAE;wCACrB,WAAW,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;qCAChC;iCACJ;6BACJ;yBACJ;qBACJ;iBACJ;aACJ;QACL,CAAC;KAEJ;IAhCD,oDAgCC"}
@@ -0,0 +1,63 @@
1
+ import { State } from "./State";
2
+ import { Transition } from "./Transition";
3
+ export declare class FiniteStateMachine {
4
+ private states;
5
+ private transitions;
6
+ /**
7
+ * Constructor reads the finite state machine in the given input file. It has a NodeList which holds the states
8
+ * of the nodes and there are 4 different type of nodes; stateNode, root Node, transitionNode and withNode.
9
+ * Also there are two states; state that a node currently in and state that a node will be in.
10
+ * <p>
11
+ * DOMParser is used to parse the given file. Firstly it gets the document to parse, then gets its elements by the
12
+ * tag names. For instance, it gets states by the tag name 'state' and puts them into an ArrayList called stateList.
13
+ * Secondly, it traverses this stateList and gets each Node's attributes. There are three attributes; name, start,
14
+ * and end which will be named as states. If a node is in a startState it is tagged as 'yes', otherwise 'no'.
15
+ * Also, if a node is in a startState, additional attribute will be fetched; originalPos that represents its original
16
+ * part of speech.
17
+ * <p>
18
+ * At the last step, by starting rootNode's first child, it gets all the transitionNodes and next states called toState,
19
+ * then continue with the nextSiblings. Also, if there is no possible toState, it prints this case and the causative states.
20
+ *
21
+ * @param fileName the resource file to read the finite state machine. Only files in resources folder are supported.
22
+ */
23
+ constructor(fileName?: string);
24
+ /**
25
+ * The isValidTransition loops through states ArrayList and checks transitions between states. If the actual transition
26
+ * equals to the given transition input, method returns true otherwise returns false.
27
+ *
28
+ * @param transition is used to compare with the actual transition of a state.
29
+ * @return true when the actual transition equals to the transition input, false otherwise.
30
+ */
31
+ isValidTransition(transition: string): boolean;
32
+ /**
33
+ * the getStates method returns the states in the FiniteStateMachine.
34
+ * @return StateList.
35
+ */
36
+ getStates(): Array<State>;
37
+ /**
38
+ * The getState method is used to loop through the states {@link Array} and return the state whose name equal
39
+ * to the given input name.
40
+ *
41
+ * @param name is used to compare with the state's actual name.
42
+ * @return state if found any, null otherwise.
43
+ */
44
+ getState(name: string): State;
45
+ /**
46
+ * Another addTransition method which takes additional argument; toPos and. It creates a new {@link Transition}
47
+ * with given input parameters and adds the transition to transitions {@link Array}.
48
+ *
49
+ * @param fromState State type input indicating the from state.
50
+ * @param toState State type input indicating the next state.
51
+ * @param _with String input indicating with what the transition will be made.
52
+ * @param withName String input.
53
+ * @param toPos String input.
54
+ */
55
+ addTransition(fromState: State, toState: State, _with: string, withName: string, toPos?: string): void;
56
+ /**
57
+ * The getTransitions method returns the transitions at the given state.
58
+ *
59
+ * @param state State input.
60
+ * @return transitions at given state.
61
+ */
62
+ getTransitions(state: State): Array<Transition>;
63
+ }
@@ -0,0 +1,178 @@
1
+ (function (factory) {
2
+ if (typeof module === "object" && typeof module.exports === "object") {
3
+ var v = factory(require, exports);
4
+ if (v !== undefined) module.exports = v;
5
+ }
6
+ else if (typeof define === "function" && define.amd) {
7
+ define(["require", "exports", "./State", "./Transition", "nlptoolkit-xmlparser/dist/XmlDocument"], factory);
8
+ }
9
+ })(function (require, exports) {
10
+ "use strict";
11
+ Object.defineProperty(exports, "__esModule", { value: true });
12
+ exports.FiniteStateMachine = void 0;
13
+ const State_1 = require("./State");
14
+ const Transition_1 = require("./Transition");
15
+ const XmlDocument_1 = require("nlptoolkit-xmlparser/dist/XmlDocument");
16
+ class FiniteStateMachine {
17
+ /**
18
+ * Constructor reads the finite state machine in the given input file. It has a NodeList which holds the states
19
+ * of the nodes and there are 4 different type of nodes; stateNode, root Node, transitionNode and withNode.
20
+ * Also there are two states; state that a node currently in and state that a node will be in.
21
+ * <p>
22
+ * DOMParser is used to parse the given file. Firstly it gets the document to parse, then gets its elements by the
23
+ * tag names. For instance, it gets states by the tag name 'state' and puts them into an ArrayList called stateList.
24
+ * Secondly, it traverses this stateList and gets each Node's attributes. There are three attributes; name, start,
25
+ * and end which will be named as states. If a node is in a startState it is tagged as 'yes', otherwise 'no'.
26
+ * Also, if a node is in a startState, additional attribute will be fetched; originalPos that represents its original
27
+ * part of speech.
28
+ * <p>
29
+ * At the last step, by starting rootNode's first child, it gets all the transitionNodes and next states called toState,
30
+ * then continue with the nextSiblings. Also, if there is no possible toState, it prints this case and the causative states.
31
+ *
32
+ * @param fileName the resource file to read the finite state machine. Only files in resources folder are supported.
33
+ */
34
+ constructor(fileName = "turkish_finite_state_machine.xml") {
35
+ this.states = new Array();
36
+ this.transitions = new Map();
37
+ let xmlDocument = new XmlDocument_1.XmlDocument(fileName);
38
+ xmlDocument.parse();
39
+ let stateListNode = xmlDocument.getFirstChild();
40
+ let stateNode = stateListNode.getFirstChild();
41
+ while (stateNode != undefined) {
42
+ if (stateNode.hasAttributes()) {
43
+ let stateName = stateNode.getAttributeValue("name");
44
+ let startState = stateNode.getAttributeValue("start");
45
+ let endState = stateNode.getAttributeValue("end");
46
+ let state;
47
+ if (startState == "yes") {
48
+ let originalPos = stateNode.getAttributeValue("originalpos");
49
+ state = new State_1.State(stateName, true, endState == "yes", originalPos);
50
+ }
51
+ else {
52
+ state = new State_1.State(stateName, false, endState == "yes");
53
+ }
54
+ this.states.push(state);
55
+ }
56
+ stateNode = stateNode.getNextSibling();
57
+ }
58
+ stateNode = stateListNode.getFirstChild();
59
+ while (stateNode != undefined) {
60
+ if (stateNode.hasAttributes()) {
61
+ let stateName = stateNode.getAttributeValue("name");
62
+ let state = this.getState(stateName);
63
+ let transitionNode = stateNode.getFirstChild();
64
+ while (transitionNode != undefined) {
65
+ if (transitionNode.hasAttributes()) {
66
+ let toStateName = transitionNode.getAttributeValue("name");
67
+ let toState = this.getState(toStateName);
68
+ let withName = transitionNode.getAttributeValue("transitionname");
69
+ let rootToPos = transitionNode.getAttributeValue("topos");
70
+ let withNode = transitionNode.getFirstChild();
71
+ while (withNode != undefined) {
72
+ let toPos;
73
+ if (withNode.hasAttributes()) {
74
+ withName = withNode.getAttributeValue("name");
75
+ toPos = withNode.getAttributeValue("topos");
76
+ }
77
+ else {
78
+ toPos = "";
79
+ }
80
+ if (toPos == "") {
81
+ if (rootToPos == "") {
82
+ this.addTransition(state, toState, withNode.getPcData(), withName);
83
+ }
84
+ else {
85
+ this.addTransition(state, toState, withNode.getPcData(), withName, rootToPos);
86
+ }
87
+ }
88
+ else {
89
+ this.addTransition(state, toState, withNode.getPcData(), withName, toPos);
90
+ }
91
+ withNode = withNode.getNextSibling();
92
+ }
93
+ }
94
+ transitionNode = transitionNode.getNextSibling();
95
+ }
96
+ }
97
+ stateNode = stateNode.getNextSibling();
98
+ }
99
+ }
100
+ /**
101
+ * The isValidTransition loops through states ArrayList and checks transitions between states. If the actual transition
102
+ * equals to the given transition input, method returns true otherwise returns false.
103
+ *
104
+ * @param transition is used to compare with the actual transition of a state.
105
+ * @return true when the actual transition equals to the transition input, false otherwise.
106
+ */
107
+ isValidTransition(transition) {
108
+ for (let state of this.transitions.keys()) {
109
+ for (let transition1 of this.transitions.get(state)) {
110
+ if (transition1.toString() != undefined && transition1.toString() == transition) {
111
+ return true;
112
+ }
113
+ }
114
+ }
115
+ return false;
116
+ }
117
+ /**
118
+ * the getStates method returns the states in the FiniteStateMachine.
119
+ * @return StateList.
120
+ */
121
+ getStates() {
122
+ return this.states;
123
+ }
124
+ /**
125
+ * The getState method is used to loop through the states {@link Array} and return the state whose name equal
126
+ * to the given input name.
127
+ *
128
+ * @param name is used to compare with the state's actual name.
129
+ * @return state if found any, null otherwise.
130
+ */
131
+ getState(name) {
132
+ for (let state of this.states) {
133
+ if (state.getName() == name) {
134
+ return state;
135
+ }
136
+ }
137
+ return undefined;
138
+ }
139
+ /**
140
+ * Another addTransition method which takes additional argument; toPos and. It creates a new {@link Transition}
141
+ * with given input parameters and adds the transition to transitions {@link Array}.
142
+ *
143
+ * @param fromState State type input indicating the from state.
144
+ * @param toState State type input indicating the next state.
145
+ * @param _with String input indicating with what the transition will be made.
146
+ * @param withName String input.
147
+ * @param toPos String input.
148
+ */
149
+ addTransition(fromState, toState, _with, withName, toPos) {
150
+ let newTransition = new Transition_1.Transition(_with, withName, toState, toPos);
151
+ let transitionList;
152
+ if (this.transitions.has(fromState)) {
153
+ transitionList = this.transitions.get(fromState);
154
+ }
155
+ else {
156
+ transitionList = new Array();
157
+ }
158
+ transitionList.push(newTransition);
159
+ this.transitions.set(fromState, transitionList);
160
+ }
161
+ /**
162
+ * The getTransitions method returns the transitions at the given state.
163
+ *
164
+ * @param state State input.
165
+ * @return transitions at given state.
166
+ */
167
+ getTransitions(state) {
168
+ if (this.transitions.has(state)) {
169
+ return this.transitions.get(state);
170
+ }
171
+ else {
172
+ return new Array();
173
+ }
174
+ }
175
+ }
176
+ exports.FiniteStateMachine = FiniteStateMachine;
177
+ });
178
+ //# sourceMappingURL=FiniteStateMachine.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"FiniteStateMachine.js","sourceRoot":"","sources":["../../source/MorphologicalAnalysis/FiniteStateMachine.ts"],"names":[],"mappings":";;;;;;;;;;;;IAAA,mCAA8B;IAC9B,6CAAwC;IACxC,uEAAkE;IAElE,MAAa,kBAAkB;QAK3B;;;;;;;;;;;;;;;;WAgBG;QACH,YAAY,WAAmB,kCAAkC;YApBzD,WAAM,GAAiB,IAAI,KAAK,EAAS,CAAA;YACzC,gBAAW,GAAkC,IAAI,GAAG,EAA4B,CAAA;YAoBpF,IAAI,WAAW,GAAG,IAAI,yBAAW,CAAC,QAAQ,CAAC,CAAA;YAC3C,WAAW,CAAC,KAAK,EAAE,CAAC;YACpB,IAAI,aAAa,GAAG,WAAW,CAAC,aAAa,EAAE,CAAC;YAChD,IAAI,SAAS,GAAG,aAAa,CAAC,aAAa,EAAE,CAAC;YAC9C,OAAO,SAAS,IAAI,SAAS,EAAE;gBAC3B,IAAI,SAAS,CAAC,aAAa,EAAE,EAAE;oBAC3B,IAAI,SAAS,GAAG,SAAS,CAAC,iBAAiB,CAAC,MAAM,CAAC,CAAC;oBACpD,IAAI,UAAU,GAAG,SAAS,CAAC,iBAAiB,CAAC,OAAO,CAAC,CAAC;oBACtD,IAAI,QAAQ,GAAG,SAAS,CAAC,iBAAiB,CAAC,KAAK,CAAC,CAAC;oBAClD,IAAI,KAAK,CAAA;oBACT,IAAI,UAAU,IAAI,KAAK,EAAE;wBACrB,IAAI,WAAW,GAAG,SAAS,CAAC,iBAAiB,CAAC,aAAa,CAAC,CAAC;wBAC7D,KAAK,GAAG,IAAI,aAAK,CAAC,SAAS,EAAE,IAAI,EAAE,QAAQ,IAAI,KAAK,EAAE,WAAW,CAAC,CAAC;qBACtE;yBAAM;wBACH,KAAK,GAAG,IAAI,aAAK,CAAC,SAAS,EAAE,KAAK,EAAE,QAAQ,IAAI,KAAK,CAAC,CAAC;qBAC1D;oBACD,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;iBAC3B;gBACD,SAAS,GAAG,SAAS,CAAC,cAAc,EAAE,CAAC;aAC1C;YACD,SAAS,GAAG,aAAa,CAAC,aAAa,EAAE,CAAC;YAC1C,OAAO,SAAS,IAAI,SAAS,EAAC;gBAC1B,IAAI,SAAS,CAAC,aAAa,EAAE,EAAC;oBAC1B,IAAI,SAAS,GAAG,SAAS,CAAC,iBAAiB,CAAC,MAAM,CAAC,CAAC;oBACpD,IAAI,KAAK,GAAG,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;oBACrC,IAAI,cAAc,GAAG,SAAS,CAAC,aAAa,EAAE,CAAC;oBAC/C,OAAO,cAAc,IAAI,SAAS,EAAC;wBAC/B,IAAI,cAAc,CAAC,aAAa,EAAE,EAAC;4BAC/B,IAAI,WAAW,GAAG,cAAc,CAAC,iBAAiB,CAAC,MAAM,CAAC,CAAC;4BAC3D,IAAI,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;4BACzC,IAAI,QAAQ,GAAG,cAAc,CAAC,iBAAiB,CAAC,gBAAgB,CAAC,CAAC;4BAClE,IAAI,SAAS,GAAG,cAAc,CAAC,iBAAiB,CAAC,OAAO,CAAC,CAAC;4BAC1D,IAAI,QAAQ,GAAG,cAAc,CAAC,aAAa,EAAE,CAAC;4BAC9C,OAAO,QAAQ,IAAI,SAAS,EAAC;gCACzB,IAAI,KAAK,CAAA;gCACT,IAAI,QAAQ,CAAC,aAAa,EAAE,EAAC;oCACzB,QAAQ,GAAG,QAAQ,CAAC,iBAAiB,CAAC,MAAM,CAAC,CAAC;oCAC9C,KAAK,GAAG,QAAQ,CAAC,iBAAiB,CAAC,OAAO,CAAC,CAAC;iCAC/C;qCAAM;oCACH,KAAK,GAAG,EAAE,CAAC;iCACd;gCACD,IAAI,KAAK,IAAI,EAAE,EAAC;oCACZ,IAAI,SAAS,IAAI,EAAE,EAAC;wCAChB,IAAI,CAAC,aAAa,CAAC,KAAK,EAAE,OAAO,EAAE,QAAQ,CAAC,SAAS,EAAE,EAAE,QAAQ,CAAC,CAAC;qCACtE;yCAAM;wCACH,IAAI,CAAC,aAAa,CAAC,KAAK,EAAE,OAAO,EAAE,QAAQ,CAAC,SAAS,EAAE,EAAE,QAAQ,EAAE,SAAS,CAAC,CAAC;qCACjF;iCACJ;qCAAM;oCACH,IAAI,CAAC,aAAa,CAAC,KAAK,EAAE,OAAO,EAAE,QAAQ,CAAC,SAAS,EAAE,EAAE,QAAQ,EAAE,KAAK,CAAC,CAAC;iCAC7E;gCACD,QAAQ,GAAG,QAAQ,CAAC,cAAc,EAAE,CAAC;6BACxC;yBACJ;wBACD,cAAc,GAAG,cAAc,CAAC,cAAc,EAAE,CAAC;qBACpD;iBACJ;gBACD,SAAS,GAAG,SAAS,CAAC,cAAc,EAAE,CAAC;aAC1C;QACL,CAAC;QAED;;;;;;WAMG;QACH,iBAAiB,CAAC,UAAkB;YAChC,KAAK,IAAI,KAAK,IAAI,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,EAAE;gBACvC,KAAK,IAAI,WAAW,IAAI,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE;oBACjD,IAAI,WAAW,CAAC,QAAQ,EAAE,IAAI,SAAS,IAAI,WAAW,CAAC,QAAQ,EAAE,IAAI,UAAU,EAAE;wBAC7E,OAAO,IAAI,CAAC;qBACf;iBACJ;aACJ;YACD,OAAO,KAAK,CAAC;QACjB,CAAC;QAED;;;WAGG;QACH,SAAS;YACL,OAAO,IAAI,CAAC,MAAM,CAAA;QACtB,CAAC;QAED;;;;;;WAMG;QACH,QAAQ,CAAC,IAAY;YACjB,KAAK,IAAI,KAAK,IAAI,IAAI,CAAC,MAAM,EAAE;gBAC3B,IAAI,KAAK,CAAC,OAAO,EAAE,IAAI,IAAI,EAAE;oBACzB,OAAO,KAAK,CAAC;iBAChB;aACJ;YACD,OAAO,SAAS,CAAC;QACrB,CAAC;QAED;;;;;;;;;WASG;QACH,aAAa,CAAC,SAAgB,EAAE,OAAc,EAAE,KAAa,EAAE,QAAgB,EAAE,KAAc;YAC3F,IAAI,aAAa,GAAG,IAAI,uBAAU,CAAC,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,KAAK,CAAC,CAAC;YACpE,IAAI,cAAc,CAAA;YAClB,IAAI,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,SAAS,CAAC,EAAC;gBAChC,cAAc,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;aACpD;iBAAM;gBACH,cAAc,GAAG,IAAI,KAAK,EAAc,CAAA;aAC3C;YACD,cAAc,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;YACnC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,SAAS,EAAE,cAAc,CAAC,CAAC;QACpD,CAAC;QAED;;;;;WAKG;QACH,cAAc,CAAC,KAAY;YACvB,IAAI,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,KAAK,CAAC,EAAC;gBAC5B,OAAO,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;aACtC;iBAAM;gBACH,OAAO,IAAI,KAAK,EAAc,CAAC;aAClC;QACL,CAAC;KACJ;IAhKD,gDAgKC"}