nlptoolkit-morphologicalanalysis 1.0.18 → 1.0.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/dist/Corpus/DisambiguatedWord.js +26 -35
  2. package/dist/Corpus/DisambiguatedWord.js.map +1 -1
  3. package/dist/Corpus/DisambiguationCorpus.js +70 -47
  4. package/dist/Corpus/DisambiguationCorpus.js.map +1 -1
  5. package/dist/MorphologicalAnalysis/FiniteStateMachine.js +148 -158
  6. package/dist/MorphologicalAnalysis/FiniteStateMachine.js.map +1 -1
  7. package/dist/MorphologicalAnalysis/FsmMorphologicalAnalyzer.js +1281 -1254
  8. package/dist/MorphologicalAnalysis/FsmMorphologicalAnalyzer.js.map +1 -1
  9. package/dist/MorphologicalAnalysis/FsmParse.js +596 -603
  10. package/dist/MorphologicalAnalysis/FsmParse.js.map +1 -1
  11. package/dist/MorphologicalAnalysis/FsmParseList.js +263 -273
  12. package/dist/MorphologicalAnalysis/FsmParseList.js.map +1 -1
  13. package/dist/MorphologicalAnalysis/InflectionalGroup.js +152 -162
  14. package/dist/MorphologicalAnalysis/InflectionalGroup.js.map +1 -1
  15. package/dist/MorphologicalAnalysis/MetamorphicParse.js +120 -129
  16. package/dist/MorphologicalAnalysis/MetamorphicParse.js.map +1 -1
  17. package/dist/MorphologicalAnalysis/MorphologicalParse.js +1037 -1046
  18. package/dist/MorphologicalAnalysis/MorphologicalParse.js.map +1 -1
  19. package/dist/MorphologicalAnalysis/MorphologicalTag.js +530 -540
  20. package/dist/MorphologicalAnalysis/MorphologicalTag.js.map +1 -1
  21. package/dist/MorphologicalAnalysis/MorphotacticEngine.js +230 -240
  22. package/dist/MorphologicalAnalysis/MorphotacticEngine.js.map +1 -1
  23. package/dist/MorphologicalAnalysis/State.js +54 -60
  24. package/dist/MorphologicalAnalysis/State.js.map +1 -1
  25. package/dist/MorphologicalAnalysis/Transition.js +408 -418
  26. package/dist/MorphologicalAnalysis/Transition.js.map +1 -1
  27. package/dist/index.js +19 -25
  28. package/dist/index.js.map +1 -1
  29. package/package.json +8 -7
  30. package/tests/FsmParseListTest.ts +3 -3
  31. package/tests/FsmParseTest.ts +1 -1
  32. package/tsconfig.json +4 -3
  33. package/turkish_dictionary.txt +9114 -9114
  34. package/source/tsconfig.json +0 -13
@@ -1,160 +1,200 @@
1
- (function (factory) {
2
- if (typeof module === "object" && typeof module.exports === "object") {
3
- var v = factory(require, exports);
4
- if (v !== undefined) module.exports = v;
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
5
7
  }
6
- else if (typeof define === "function" && define.amd) {
7
- define(["require", "exports", "nlptoolkit-dictionary/dist/Dictionary/Trie/Trie", "./FiniteStateMachine", "nlptoolkit-dictionary/dist/Dictionary/TxtDictionary", "nlptoolkit-datastructure/dist/LRUCache", "./FsmParseList", "nlptoolkit-dictionary/dist/Dictionary/WordComparator", "fs", "./Transition", "./MorphologicalTag", "nlptoolkit-dictionary/dist/Dictionary/TxtWord", "./FsmParse", "nlptoolkit-corpus/dist/Sentence", "nlptoolkit-dictionary/dist/Dictionary/Word", "./State", "nlptoolkit-datastructure/dist/Queue", "nlptoolkit-util/dist/FileUtils"], factory);
8
- }
9
- })(function (require, exports) {
10
- "use strict";
11
- Object.defineProperty(exports, "__esModule", { value: true });
12
- exports.FsmMorphologicalAnalyzer = void 0;
13
- const Trie_1 = require("nlptoolkit-dictionary/dist/Dictionary/Trie/Trie");
14
- const FiniteStateMachine_1 = require("./FiniteStateMachine");
15
- const TxtDictionary_1 = require("nlptoolkit-dictionary/dist/Dictionary/TxtDictionary");
16
- const LRUCache_1 = require("nlptoolkit-datastructure/dist/LRUCache");
17
- const FsmParseList_1 = require("./FsmParseList");
18
- const WordComparator_1 = require("nlptoolkit-dictionary/dist/Dictionary/WordComparator");
19
- const fs = require("fs");
20
- const Transition_1 = require("./Transition");
21
- const MorphologicalTag_1 = require("./MorphologicalTag");
22
- const TxtWord_1 = require("nlptoolkit-dictionary/dist/Dictionary/TxtWord");
23
- const FsmParse_1 = require("./FsmParse");
24
- const Sentence_1 = require("nlptoolkit-corpus/dist/Sentence");
25
- const Word_1 = require("nlptoolkit-dictionary/dist/Dictionary/Word");
26
- const State_1 = require("./State");
27
- const Queue_1 = require("nlptoolkit-datastructure/dist/Queue");
28
- const FileUtils_1 = require("nlptoolkit-util/dist/FileUtils");
29
- class FsmMorphologicalAnalyzer {
30
- /**
31
- * Another constructor of FsmMorphologicalAnalyzer class. It generates a new TxtDictionary type dictionary from
32
- * given input dictionary, with given inputs fileName and cacheSize.
33
- *
34
- * @param fileName the file to read the finite state machine.
35
- * @param dictionaryFileNameOrDictionary the dictionary file that will be used to generate dictionaryTrie.
36
- * @param cacheSize the size of the LRUCache.
37
- */
38
- constructor(fileName, dictionaryFileNameOrDictionary, cacheSize) {
39
- this.parsedSurfaceForms = undefined;
40
- this.pronunciations = undefined;
41
- this.cache = undefined;
42
- this.mostUsedPatterns = new Map();
43
- if (dictionaryFileNameOrDictionary == undefined) {
44
- this.dictionary = new TxtDictionary_1.TxtDictionary();
45
- }
46
- else {
47
- if (dictionaryFileNameOrDictionary instanceof TxtDictionary_1.TxtDictionary) {
48
- this.dictionary = dictionaryFileNameOrDictionary;
49
- }
50
- else {
51
- this.dictionary = new TxtDictionary_1.TxtDictionary(WordComparator_1.WordComparator.TURKISH, dictionaryFileNameOrDictionary);
52
- }
53
- }
54
- if (fileName == undefined) {
55
- this.finiteStateMachine = new FiniteStateMachine_1.FiniteStateMachine();
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.FsmMorphologicalAnalyzer = void 0;
37
+ const Trie_1 = require("nlptoolkit-dictionary/dist/Dictionary/Trie/Trie");
38
+ const FiniteStateMachine_1 = require("./FiniteStateMachine");
39
+ const TxtDictionary_1 = require("nlptoolkit-dictionary/dist/Dictionary/TxtDictionary");
40
+ const LRUCache_1 = require("nlptoolkit-datastructure/dist/LRUCache");
41
+ const FsmParseList_1 = require("./FsmParseList");
42
+ const WordComparator_1 = require("nlptoolkit-dictionary/dist/Dictionary/WordComparator");
43
+ const fs = __importStar(require("fs"));
44
+ const Transition_1 = require("./Transition");
45
+ const MorphologicalTag_1 = require("./MorphologicalTag");
46
+ const TxtWord_1 = require("nlptoolkit-dictionary/dist/Dictionary/TxtWord");
47
+ const FsmParse_1 = require("./FsmParse");
48
+ const Sentence_1 = require("nlptoolkit-corpus/dist/Sentence");
49
+ const Word_1 = require("nlptoolkit-dictionary/dist/Dictionary/Word");
50
+ const State_1 = require("./State");
51
+ const Queue_1 = require("nlptoolkit-datastructure/dist/Queue");
52
+ const FileUtils_1 = require("nlptoolkit-util/dist/FileUtils");
53
+ class FsmMorphologicalAnalyzer {
54
+ dictionaryTrie;
55
+ suffixTrie;
56
+ parsedSurfaceForms = undefined;
57
+ pronunciations = undefined;
58
+ finiteStateMachine;
59
+ static MAX_DISTANCE = 2;
60
+ dictionary;
61
+ cache = undefined;
62
+ mostUsedPatterns = new Map();
63
+ /**
64
+ * Another constructor of FsmMorphologicalAnalyzer class. It generates a new TxtDictionary type dictionary from
65
+ * given input dictionary, with given inputs fileName and cacheSize.
66
+ *
67
+ * @param fileName the file to read the finite state machine.
68
+ * @param dictionaryFileNameOrDictionary the dictionary file that will be used to generate dictionaryTrie.
69
+ * @param cacheSize the size of the LRUCache.
70
+ */
71
+ constructor(fileName, dictionaryFileNameOrDictionary, cacheSize) {
72
+ if (dictionaryFileNameOrDictionary == undefined) {
73
+ this.dictionary = new TxtDictionary_1.TxtDictionary();
74
+ }
75
+ else {
76
+ if (dictionaryFileNameOrDictionary instanceof TxtDictionary_1.TxtDictionary) {
77
+ this.dictionary = dictionaryFileNameOrDictionary;
56
78
  }
57
79
  else {
58
- this.finiteStateMachine = new FiniteStateMachine_1.FiniteStateMachine(fileName);
59
- }
60
- this.prepareSuffixTrie();
61
- this.dictionaryTrie = this.dictionary.prepareTrie();
62
- if (cacheSize > 0) {
63
- this.cache = new LRUCache_1.LRUCache(cacheSize);
64
- }
65
- this.addPronunciations("pronunciations.txt");
66
- }
67
- /**
68
- * Constructs and returns the reverse string of a given string.
69
- * @param s String to be reversed.
70
- * @return Reverse of a given string.
71
- */
72
- reverseString(s) {
73
- let result = "";
74
- for (let i = s.length - 1; i >= 0; i--) {
75
- result += s[i];
80
+ this.dictionary = new TxtDictionary_1.TxtDictionary(WordComparator_1.WordComparator.TURKISH, dictionaryFileNameOrDictionary);
76
81
  }
82
+ }
83
+ if (fileName == undefined) {
84
+ this.finiteStateMachine = new FiniteStateMachine_1.FiniteStateMachine();
85
+ }
86
+ else {
87
+ this.finiteStateMachine = new FiniteStateMachine_1.FiniteStateMachine(fileName);
88
+ }
89
+ this.prepareSuffixTrie();
90
+ this.dictionaryTrie = this.dictionary.prepareTrie();
91
+ if (cacheSize > 0) {
92
+ this.cache = new LRUCache_1.LRUCache(cacheSize);
93
+ }
94
+ this.addPronunciations("pronunciations.txt");
95
+ }
96
+ /**
97
+ * Constructs and returns the reverse string of a given string.
98
+ * @param s String to be reversed.
99
+ * @return Reverse of a given string.
100
+ */
101
+ reverseString(s) {
102
+ let result = "";
103
+ for (let i = s.length - 1; i >= 0; i--) {
104
+ result += s[i];
105
+ }
106
+ return result;
107
+ }
108
+ /**
109
+ * Constructs the suffix trie from the input file suffixes.txt. suffixes.txt contains the most frequent 6000
110
+ * suffixes that a verb or a noun can take. The suffix trie is a trie that stores these suffixes in reverse form,
111
+ * which can be then used to match a given word for its possible suffix content.
112
+ */
113
+ prepareSuffixTrie() {
114
+ this.suffixTrie = new Trie_1.Trie();
115
+ let data = fs.readFileSync("suffixes.txt", 'utf8');
116
+ let lines = data.split("\n");
117
+ for (let suffix of lines) {
118
+ let reverseSuffix = this.reverseString(suffix);
119
+ this.suffixTrie.addWord(reverseSuffix, new Word_1.Word(reverseSuffix));
120
+ }
121
+ }
122
+ /**
123
+ * Reads the file for correct surface forms and their most frequent root forms, in other words, the surface forms
124
+ * which have at least one morphological analysis in Turkish.
125
+ * @param fileName Input file containing analyzable surface forms and their root forms.
126
+ */
127
+ addParsedSurfaceForms(fileName) {
128
+ this.parsedSurfaceForms = FileUtils_1.FileUtils.readHashMap(fileName);
129
+ }
130
+ /**
131
+ * Reads the file for foreign words and their pronunciations.
132
+ * @param fileName Input file containing foreign words and their pronunciations.
133
+ */
134
+ addPronunciations(fileName) {
135
+ this.pronunciations = FileUtils_1.FileUtils.readHashMap(fileName);
136
+ }
137
+ /**
138
+ * The getPossibleWords method takes {@link MorphologicalParse} and {@link MetamorphicParse} as input.
139
+ * First it determines whether the given morphologicalParse is the root verb and whether it contains a verb tag.
140
+ * Then it creates new transition with -mak and creates a new {@link Set} result.
141
+ * <p>
142
+ * It takes the given {@link MetamorphicParse} input as currentWord and if there is a compound word starting with the
143
+ * currentWord, it gets this compoundWord from dictionaryTrie. If there is a compoundWord and the difference of the
144
+ * currentWord and compundWords is less than 3 than compoundWord is added to the result, otherwise currentWord is added.
145
+ * <p>
146
+ * Then it gets the root from parse input as a currentRoot. If it is not null, and morphologicalParse input is verb,
147
+ * it directly adds the verb to result after making transition to currentRoot with currentWord String. Else, it creates a new
148
+ * transition with -lar and make this transition then adds to the result.
149
+ *
150
+ * @param morphologicalParse {@link MorphologicalParse} type input.
151
+ * @param metamorphicParse {@link MetamorphicParse} type input.
152
+ * @return {@link HashSet} result.
153
+ */
154
+ getPossibleWords(morphologicalParse, metamorphicParse) {
155
+ let isRootVerb = morphologicalParse.getRootPos() == "VERB";
156
+ let containsVerb = morphologicalParse.containsTag(MorphologicalTag_1.MorphologicalTag.VERB);
157
+ let verbTransition = new Transition_1.Transition("mAk");
158
+ let result = new Set();
159
+ if (metamorphicParse == undefined || metamorphicParse.getWord() == undefined) {
77
160
  return result;
78
161
  }
79
- /**
80
- * Constructs the suffix trie from the input file suffixes.txt. suffixes.txt contains the most frequent 6000
81
- * suffixes that a verb or a noun can take. The suffix trie is a trie that stores these suffixes in reverse form,
82
- * which can be then used to match a given word for its possible suffix content.
83
- */
84
- prepareSuffixTrie() {
85
- this.suffixTrie = new Trie_1.Trie();
86
- let data = fs.readFileSync("suffixes.txt", 'utf8');
87
- let lines = data.split("\n");
88
- for (let suffix of lines) {
89
- let reverseSuffix = this.reverseString(suffix);
90
- this.suffixTrie.addWord(reverseSuffix, new Word_1.Word(reverseSuffix));
91
- }
162
+ let currentWord = metamorphicParse.getWord().getName();
163
+ let pluralIndex = -1;
164
+ let compoundWord = this.dictionaryTrie.getCompoundWordStartingWith(currentWord);
165
+ if (!isRootVerb) {
166
+ if (compoundWord != null && compoundWord.getName().length - currentWord.length < 3) {
167
+ result.add(compoundWord.getName());
168
+ }
169
+ result.add(currentWord);
92
170
  }
93
- /**
94
- * Reads the file for correct surface forms and their most frequent root forms, in other words, the surface forms
95
- * which have at least one morphological analysis in Turkish.
96
- * @param fileName Input file containing analyzable surface forms and their root forms.
97
- */
98
- addParsedSurfaceForms(fileName) {
99
- this.parsedSurfaceForms = FileUtils_1.FileUtils.readHashMap(fileName);
100
- }
101
- /**
102
- * Reads the file for foreign words and their pronunciations.
103
- * @param fileName Input file containing foreign words and their pronunciations.
104
- */
105
- addPronunciations(fileName) {
106
- this.pronunciations = FileUtils_1.FileUtils.readHashMap(fileName);
107
- }
108
- /**
109
- * The getPossibleWords method takes {@link MorphologicalParse} and {@link MetamorphicParse} as input.
110
- * First it determines whether the given morphologicalParse is the root verb and whether it contains a verb tag.
111
- * Then it creates new transition with -mak and creates a new {@link Set} result.
112
- * <p>
113
- * It takes the given {@link MetamorphicParse} input as currentWord and if there is a compound word starting with the
114
- * currentWord, it gets this compoundWord from dictionaryTrie. If there is a compoundWord and the difference of the
115
- * currentWord and compundWords is less than 3 than compoundWord is added to the result, otherwise currentWord is added.
116
- * <p>
117
- * Then it gets the root from parse input as a currentRoot. If it is not null, and morphologicalParse input is verb,
118
- * it directly adds the verb to result after making transition to currentRoot with currentWord String. Else, it creates a new
119
- * transition with -lar and make this transition then adds to the result.
120
- *
121
- * @param morphologicalParse {@link MorphologicalParse} type input.
122
- * @param metamorphicParse {@link MetamorphicParse} type input.
123
- * @return {@link HashSet} result.
124
- */
125
- getPossibleWords(morphologicalParse, metamorphicParse) {
126
- let isRootVerb = morphologicalParse.getRootPos() == "VERB";
127
- let containsVerb = morphologicalParse.containsTag(MorphologicalTag_1.MorphologicalTag.VERB);
128
- let verbTransition = new Transition_1.Transition("mAk");
129
- let result = new Set();
130
- if (metamorphicParse == undefined || metamorphicParse.getWord() == undefined) {
131
- return result;
132
- }
133
- let currentWord = metamorphicParse.getWord().getName();
134
- let pluralIndex = -1;
135
- let compoundWord = this.dictionaryTrie.getCompoundWordStartingWith(currentWord);
136
- if (!isRootVerb) {
137
- if (compoundWord != null && compoundWord.getName().length - currentWord.length < 3) {
138
- result.add(compoundWord.getName());
171
+ let currentRoot = this.dictionary.getWord(metamorphicParse.getWord().getName());
172
+ if (currentRoot == undefined && compoundWord != undefined) {
173
+ currentRoot = compoundWord;
174
+ }
175
+ if (currentRoot != undefined) {
176
+ if (isRootVerb) {
177
+ let verbWord = verbTransition.makeTransition(currentRoot, currentWord);
178
+ result.add(verbWord);
179
+ }
180
+ let pluralWord = undefined;
181
+ for (let i = 1; i < metamorphicParse.size(); i++) {
182
+ let transition = new Transition_1.Transition(metamorphicParse.getMetaMorpheme(i), undefined, undefined);
183
+ if (metamorphicParse.getMetaMorpheme(i) == "lAr") {
184
+ pluralWord = currentWord;
185
+ pluralIndex = i + 1;
139
186
  }
187
+ currentWord = transition.makeTransition(currentRoot, currentWord);
140
188
  result.add(currentWord);
141
- }
142
- let currentRoot = this.dictionary.getWord(metamorphicParse.getWord().getName());
143
- if (currentRoot == undefined && compoundWord != undefined) {
144
- currentRoot = compoundWord;
145
- }
146
- if (currentRoot != undefined) {
147
- if (isRootVerb) {
189
+ if (containsVerb) {
148
190
  let verbWord = verbTransition.makeTransition(currentRoot, currentWord);
149
191
  result.add(verbWord);
150
192
  }
151
- let pluralWord = undefined;
152
- for (let i = 1; i < metamorphicParse.size(); i++) {
193
+ }
194
+ if (pluralWord != null) {
195
+ currentWord = pluralWord;
196
+ for (let i = pluralIndex; i < metamorphicParse.size(); i++) {
153
197
  let transition = new Transition_1.Transition(metamorphicParse.getMetaMorpheme(i), undefined, undefined);
154
- if (metamorphicParse.getMetaMorpheme(i) == "lAr") {
155
- pluralWord = currentWord;
156
- pluralIndex = i + 1;
157
- }
158
198
  currentWord = transition.makeTransition(currentRoot, currentWord);
159
199
  result.add(currentWord);
160
200
  if (containsVerb) {
@@ -162,1212 +202,1200 @@
162
202
  result.add(verbWord);
163
203
  }
164
204
  }
165
- if (pluralWord != null) {
166
- currentWord = pluralWord;
167
- for (let i = pluralIndex; i < metamorphicParse.size(); i++) {
168
- let transition = new Transition_1.Transition(metamorphicParse.getMetaMorpheme(i), undefined, undefined);
169
- currentWord = transition.makeTransition(currentRoot, currentWord);
170
- result.add(currentWord);
171
- if (containsVerb) {
172
- let verbWord = verbTransition.makeTransition(currentRoot, currentWord);
173
- result.add(verbWord);
174
- }
175
- }
176
- }
177
205
  }
178
- return result;
179
206
  }
180
- /**
181
- * The getDictionary method is used to get TxtDictionary.
182
- *
183
- * @return TxtDictionary type dictionary.
184
- */
185
- getDictionary() {
186
- return this.dictionary;
187
- }
188
- /**
189
- * The getFiniteStateMachine method is used to get FiniteStateMachine.
190
- *
191
- * @return FiniteStateMachine type finiteStateMachine.
192
- */
193
- getFiniteStateMachine() {
194
- return this.finiteStateMachine;
195
- }
196
- /**
197
- * The isPossibleSubstring method first checks whether given short and long strings are equal to root word.
198
- * Then, compares both short and long strings' chars till the last two chars of short string. In the presence of mismatch,
199
- * false is returned. On the other hand, it counts the distance between two strings until it becomes greater than 2,
200
- * which is the MAX_DISTANCE also finds the index of the last char.
201
- * <p>
202
- * If the substring is a rootWord and equals to 'ben', which is a special case or root holds the lastIdropsDuringSuffixation or
203
- * lastIdropsDuringPassiveSuffixation conditions, then it returns true if distance is not greater than MAX_DISTANCE.
204
- * <p>
205
- * On the other hand, if the shortStrong ends with one of these chars 'e, a, p, ç, t, k' and 't 's a rootWord with
206
- * the conditions of rootSoftenDuringSuffixation, vowelEChangesToIDuringYSuffixation, vowelAChangesToIDuringYSuffixation
207
- * or endingKChangesIntoG then it returns true if the last index is not equal to 2 and distance is not greater than
208
- * MAX_DISTANCE and false otherwise.
209
- *
210
- * @param shortString the possible substring.
211
- * @param longString the long string to compare with substring.
212
- * @param root the root of the long string.
213
- * @return true if given substring is the actual substring of the longString, false otherwise.
214
- */
215
- isPossibleSubstring(shortString, longString, root) {
216
- let rootWord = ((shortString == root.getName()) || longString == root.getName());
217
- let distance = 0, last = 1;
218
- for (let j = 0; j < shortString.length; j++) {
219
- if (shortString.charAt(j) != longString.charAt(j)) {
220
- if (j < shortString.length - 2) {
221
- return false;
222
- }
223
- last = shortString.length - j;
224
- distance++;
225
- if (distance > FsmMorphologicalAnalyzer.MAX_DISTANCE) {
226
- break;
227
- }
207
+ return result;
208
+ }
209
+ /**
210
+ * The getDictionary method is used to get TxtDictionary.
211
+ *
212
+ * @return TxtDictionary type dictionary.
213
+ */
214
+ getDictionary() {
215
+ return this.dictionary;
216
+ }
217
+ /**
218
+ * The getFiniteStateMachine method is used to get FiniteStateMachine.
219
+ *
220
+ * @return FiniteStateMachine type finiteStateMachine.
221
+ */
222
+ getFiniteStateMachine() {
223
+ return this.finiteStateMachine;
224
+ }
225
+ /**
226
+ * The isPossibleSubstring method first checks whether given short and long strings are equal to root word.
227
+ * Then, compares both short and long strings' chars till the last two chars of short string. In the presence of mismatch,
228
+ * false is returned. On the other hand, it counts the distance between two strings until it becomes greater than 2,
229
+ * which is the MAX_DISTANCE also finds the index of the last char.
230
+ * <p>
231
+ * If the substring is a rootWord and equals to 'ben', which is a special case or root holds the lastIdropsDuringSuffixation or
232
+ * lastIdropsDuringPassiveSuffixation conditions, then it returns true if distance is not greater than MAX_DISTANCE.
233
+ * <p>
234
+ * On the other hand, if the shortStrong ends with one of these chars 'e, a, p, ç, t, k' and 't 's a rootWord with
235
+ * the conditions of rootSoftenDuringSuffixation, vowelEChangesToIDuringYSuffixation, vowelAChangesToIDuringYSuffixation
236
+ * or endingKChangesIntoG then it returns true if the last index is not equal to 2 and distance is not greater than
237
+ * MAX_DISTANCE and false otherwise.
238
+ *
239
+ * @param shortString the possible substring.
240
+ * @param longString the long string to compare with substring.
241
+ * @param root the root of the long string.
242
+ * @return true if given substring is the actual substring of the longString, false otherwise.
243
+ */
244
+ isPossibleSubstring(shortString, longString, root) {
245
+ let rootWord = ((shortString == root.getName()) || longString == root.getName());
246
+ let distance = 0, last = 1;
247
+ for (let j = 0; j < shortString.length; j++) {
248
+ if (shortString.charAt(j) != longString.charAt(j)) {
249
+ if (j < shortString.length - 2) {
250
+ return false;
251
+ }
252
+ last = shortString.length - j;
253
+ distance++;
254
+ if (distance > FsmMorphologicalAnalyzer.MAX_DISTANCE) {
255
+ break;
228
256
  }
229
257
  }
230
- if (rootWord && (root.getName() == "ben" || root.getName() == "sen" ||
231
- root.lastIdropsDuringSuffixation() || root.lastIdropsDuringPassiveSuffixation())) {
232
- return (distance <= FsmMorphologicalAnalyzer.MAX_DISTANCE);
258
+ }
259
+ if (rootWord && (root.getName() == "ben" || root.getName() == "sen" ||
260
+ root.lastIdropsDuringSuffixation() || root.lastIdropsDuringPassiveSuffixation())) {
261
+ return (distance <= FsmMorphologicalAnalyzer.MAX_DISTANCE);
262
+ }
263
+ else {
264
+ if (shortString.endsWith("e") || shortString.endsWith("a") || shortString.endsWith("p") ||
265
+ shortString.endsWith("ç") || shortString.endsWith("t") || shortString.endsWith("k") ||
266
+ (rootWord && (root.rootSoftenDuringSuffixation() || root.vowelEChangesToIDuringYSuffixation() ||
267
+ root.vowelAChangesToIDuringYSuffixation() || root.endingKChangesIntoG()))) {
268
+ return (last != 2 && distance <= FsmMorphologicalAnalyzer.MAX_DISTANCE - 1);
233
269
  }
234
270
  else {
235
- if (shortString.endsWith("e") || shortString.endsWith("a") || shortString.endsWith("p") ||
236
- shortString.endsWith("ç") || shortString.endsWith("t") || shortString.endsWith("k") ||
237
- (rootWord && (root.rootSoftenDuringSuffixation() || root.vowelEChangesToIDuringYSuffixation() ||
238
- root.vowelAChangesToIDuringYSuffixation() || root.endingKChangesIntoG()))) {
239
- return (last != 2 && distance <= FsmMorphologicalAnalyzer.MAX_DISTANCE - 1);
240
- }
241
- else {
242
- return (distance <= FsmMorphologicalAnalyzer.MAX_DISTANCE - 2);
243
- }
271
+ return (distance <= FsmMorphologicalAnalyzer.MAX_DISTANCE - 2);
244
272
  }
245
273
  }
246
- /**
247
- * The initializeParseList method initializes the given given fsm ArrayList with given root words by parsing them.
248
- * <p>
249
- * It checks many conditions;
250
- * isPlural; if root holds the condition then it gets the state with the name of NominalRootPlural, then
251
- * creates a new parsing and adds this to the input fsmParse Arraylist.
252
- * Ex : Açıktohumlular
253
- * <p>
254
- * !isPlural and isPortmanteauEndingWithSI, if root holds the conditions then it gets the state with the
255
- * name of NominalRootNoPossesive.
256
- * Ex : Balarısı
257
- * <p>
258
- * !isPlural and isPortmanteau, if root holds the conditions then it gets the state with the name of
259
- * CompoundNounRoot.
260
- * Ex : Aslanağızı
261
- * <p>
262
- * !isPlural, !isPortmanteau and isHeader, if root holds the conditions then it gets the state with the
263
- * name of HeaderRoot.
264
- * Ex : </title>
265
- * <p>
266
- * !isPlural, !isPortmanteau and isInterjection, if root holds the conditions then it gets the state
267
- * with the name of InterjectionRoot.
268
- * Ex : Hey, Aa
269
- * <p>
270
- * !isPlural, !isPortmanteau and isDuplicate, if root holds the conditions then it gets the state
271
- * with the name of DuplicateRoot.
272
- * Ex : Allak,
273
- * !isPlural, !isPortmanteau and isCode, if root holds the conditions then it gets the state
274
- * with the name of CodeRoot.
275
- * Ex : 9400f,
276
- * <p>
277
- * !isPlural, !isPortmanteau and isMetric, if root holds the conditions then it gets the state
278
- * with the name of MetricRoot.
279
- * Ex : 11x8x12,
280
- * <p>
281
- * !isPlural, !isPortmanteau and isNumeral, if root holds the conditions then it gets the state
282
- * with the name of CardinalRoot.
283
- * Ex : Yüz, bin
284
- * <p>
285
- * !isPlural, !isPortmanteau and isReal, if root holds the conditions then it gets the state
286
- * with the name of RealRoot.
287
- * Ex : 1.2
288
- * <p>
289
- * !isPlural, !isPortmanteau and isFraction, if root holds the conditions then it gets the state
290
- * with the name of FractionRoot.
291
- * Ex : 1/2
292
- * <p>
293
- * !isPlural, !isPortmanteau and isDate, if root holds the conditions then it gets the state
294
- * with the name of DateRoot.
295
- * Ex : 11/06/2018
296
- * <p>
297
- * !isPlural, !isPortmanteau and isPercent, if root holds the conditions then it gets the state
298
- * with the name of PercentRoot.
299
- * Ex : %12.5
300
- * <p>
301
- * !isPlural, !isPortmanteau and isRange, if root holds the conditions then it gets the state
302
- * with the name of RangeRoot.
303
- * Ex : 3-5
304
- * <p>
305
- * !isPlural, !isPortmanteau and isTime, if root holds the conditions then it gets the state
306
- * with the name of TimeRoot.
307
- * Ex : 13:16:08
308
- * <p>
309
- * !isPlural, !isPortmanteau and isOrdinal, if root holds the conditions then it gets the state
310
- * with the name of OrdinalRoot.
311
- * Ex : Altıncı
312
- * <p>
313
- * !isPlural, !isPortmanteau, and isVerb if root holds the conditions then it gets the state
314
- * with the name of VerbalRoot. Or isPassive, then it gets the state with the name of PassiveHn.
315
- * Ex : Anla (!isPAssive)
316
- * Ex : Çağrıl (isPassive)
317
- * <p>
318
- * !isPlural, !isPortmanteau and isPronoun, if root holds the conditions then it gets the state
319
- * with the name of PronounRoot. There are 6 different Pronoun state names, REFLEX, QUANT, QUANTPLURAL, DEMONS, PERS, QUES.
320
- * REFLEX = Reflexive Pronouns Ex : kendi
321
- * QUANT = Quantitative Pronouns Ex : öbür, hep, kimse, hiçbiri, bazı, kimi, biri
322
- * QUANTPLURAL = Quantitative Plural Pronouns Ex : tümü, çoğu, hepsi
323
- * DEMONS = Demonstrative Pronouns Ex : o, bu, şu
324
- * PERS = Personal Pronouns Ex : ben, sen, o, biz, siz, onlar
325
- * QUES = Interrogatıve Pronouns Ex : nere, ne, kim, hangi
326
- * <p>
327
- * !isPlural, !isPortmanteau and isAdjective, if root holds the conditions then it gets the state
328
- * with the name of AdjectiveRoot.
329
- * Ex : Absürt, Abes
330
- * <p>
331
- * !isPlural, !isPortmanteau and isPureAdjective, if root holds the conditions then it gets the state
332
- * with the name of Adjective.
333
- * Ex : Geçmiş, Cam
334
- * <p>
335
- * !isPlural, !isPortmanteau and isNominal, if root holds the conditions then it gets the state
336
- * with the name of NominalRoot.
337
- * Ex : Görüş
338
- * <p>
339
- * !isPlural, !isPortmanteau and isProper, if root holds the conditions then it gets the state
340
- * with the name of ProperRoot.
341
- * Ex : Abdi
342
- * <p>
343
- * !isPlural, !isPortmanteau and isQuestion, if root holds the conditions then it gets the state
344
- * with the name of QuestionRoot.
345
- * Ex : Mi,
346
- * <p>
347
- * !isPlural, !isPortmanteau and isDeterminer, if root holds the conditions then it gets the state
348
- * with the name of DeterminerRoot.
349
- * Ex : Çok, bir
350
- * <p>
351
- * !isPlural, !isPortmanteau and isConjunction, if root holds the conditions then it gets the state
352
- * with the name of ConjunctionRoot.
353
- * Ex : Ama , ancak
354
- * <p>
355
- * !isPlural, !isPortmanteau and isPostP, if root holds the conditions then it gets the state
356
- * with the name of PostP.
357
- * Ex : Ait, dair
358
- * <p>
359
- * !isPlural, !isPortmanteau and isAdverb, if root holds the conditions then it gets the state
360
- * with the name of AdverbRoot.
361
- * Ex : Acilen
362
- *
363
- * @param fsmParse ArrayList to initialize.
364
- * @param root word to check properties and add to fsmParse according to them.
365
- * @param isProper is used to check a word is proper or not.
366
- */
367
- initializeParseList(fsmParse, root, isProper) {
368
- let currentFsmParse;
369
- if (root.isPlural()) {
370
- currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("NominalRootPlural"));
274
+ }
275
+ /**
276
+ * The initializeParseList method initializes the given given fsm ArrayList with given root words by parsing them.
277
+ * <p>
278
+ * It checks many conditions;
279
+ * isPlural; if root holds the condition then it gets the state with the name of NominalRootPlural, then
280
+ * creates a new parsing and adds this to the input fsmParse Arraylist.
281
+ * Ex : Açıktohumlular
282
+ * <p>
283
+ * !isPlural and isPortmanteauEndingWithSI, if root holds the conditions then it gets the state with the
284
+ * name of NominalRootNoPossesive.
285
+ * Ex : Balarısı
286
+ * <p>
287
+ * !isPlural and isPortmanteau, if root holds the conditions then it gets the state with the name of
288
+ * CompoundNounRoot.
289
+ * Ex : Aslanağızı
290
+ * <p>
291
+ * !isPlural, !isPortmanteau and isHeader, if root holds the conditions then it gets the state with the
292
+ * name of HeaderRoot.
293
+ * Ex : </title>
294
+ * <p>
295
+ * !isPlural, !isPortmanteau and isInterjection, if root holds the conditions then it gets the state
296
+ * with the name of InterjectionRoot.
297
+ * Ex : Hey, Aa
298
+ * <p>
299
+ * !isPlural, !isPortmanteau and isDuplicate, if root holds the conditions then it gets the state
300
+ * with the name of DuplicateRoot.
301
+ * Ex : Allak,
302
+ * !isPlural, !isPortmanteau and isCode, if root holds the conditions then it gets the state
303
+ * with the name of CodeRoot.
304
+ * Ex : 9400f,
305
+ * <p>
306
+ * !isPlural, !isPortmanteau and isMetric, if root holds the conditions then it gets the state
307
+ * with the name of MetricRoot.
308
+ * Ex : 11x8x12,
309
+ * <p>
310
+ * !isPlural, !isPortmanteau and isNumeral, if root holds the conditions then it gets the state
311
+ * with the name of CardinalRoot.
312
+ * Ex : Yüz, bin
313
+ * <p>
314
+ * !isPlural, !isPortmanteau and isReal, if root holds the conditions then it gets the state
315
+ * with the name of RealRoot.
316
+ * Ex : 1.2
317
+ * <p>
318
+ * !isPlural, !isPortmanteau and isFraction, if root holds the conditions then it gets the state
319
+ * with the name of FractionRoot.
320
+ * Ex : 1/2
321
+ * <p>
322
+ * !isPlural, !isPortmanteau and isDate, if root holds the conditions then it gets the state
323
+ * with the name of DateRoot.
324
+ * Ex : 11/06/2018
325
+ * <p>
326
+ * !isPlural, !isPortmanteau and isPercent, if root holds the conditions then it gets the state
327
+ * with the name of PercentRoot.
328
+ * Ex : %12.5
329
+ * <p>
330
+ * !isPlural, !isPortmanteau and isRange, if root holds the conditions then it gets the state
331
+ * with the name of RangeRoot.
332
+ * Ex : 3-5
333
+ * <p>
334
+ * !isPlural, !isPortmanteau and isTime, if root holds the conditions then it gets the state
335
+ * with the name of TimeRoot.
336
+ * Ex : 13:16:08
337
+ * <p>
338
+ * !isPlural, !isPortmanteau and isOrdinal, if root holds the conditions then it gets the state
339
+ * with the name of OrdinalRoot.
340
+ * Ex : Altıncı
341
+ * <p>
342
+ * !isPlural, !isPortmanteau, and isVerb if root holds the conditions then it gets the state
343
+ * with the name of VerbalRoot. Or isPassive, then it gets the state with the name of PassiveHn.
344
+ * Ex : Anla (!isPAssive)
345
+ * Ex : Çağrıl (isPassive)
346
+ * <p>
347
+ * !isPlural, !isPortmanteau and isPronoun, if root holds the conditions then it gets the state
348
+ * with the name of PronounRoot. There are 6 different Pronoun state names, REFLEX, QUANT, QUANTPLURAL, DEMONS, PERS, QUES.
349
+ * REFLEX = Reflexive Pronouns Ex : kendi
350
+ * QUANT = Quantitative Pronouns Ex : öbür, hep, kimse, hiçbiri, bazı, kimi, biri
351
+ * QUANTPLURAL = Quantitative Plural Pronouns Ex : tümü, çoğu, hepsi
352
+ * DEMONS = Demonstrative Pronouns Ex : o, bu, şu
353
+ * PERS = Personal Pronouns Ex : ben, sen, o, biz, siz, onlar
354
+ * QUES = Interrogatıve Pronouns Ex : nere, ne, kim, hangi
355
+ * <p>
356
+ * !isPlural, !isPortmanteau and isAdjective, if root holds the conditions then it gets the state
357
+ * with the name of AdjectiveRoot.
358
+ * Ex : Absürt, Abes
359
+ * <p>
360
+ * !isPlural, !isPortmanteau and isPureAdjective, if root holds the conditions then it gets the state
361
+ * with the name of Adjective.
362
+ * Ex : Geçmiş, Cam
363
+ * <p>
364
+ * !isPlural, !isPortmanteau and isNominal, if root holds the conditions then it gets the state
365
+ * with the name of NominalRoot.
366
+ * Ex : Görüş
367
+ * <p>
368
+ * !isPlural, !isPortmanteau and isProper, if root holds the conditions then it gets the state
369
+ * with the name of ProperRoot.
370
+ * Ex : Abdi
371
+ * <p>
372
+ * !isPlural, !isPortmanteau and isQuestion, if root holds the conditions then it gets the state
373
+ * with the name of QuestionRoot.
374
+ * Ex : Mi, mü
375
+ * <p>
376
+ * !isPlural, !isPortmanteau and isDeterminer, if root holds the conditions then it gets the state
377
+ * with the name of DeterminerRoot.
378
+ * Ex : Çok, bir
379
+ * <p>
380
+ * !isPlural, !isPortmanteau and isConjunction, if root holds the conditions then it gets the state
381
+ * with the name of ConjunctionRoot.
382
+ * Ex : Ama , ancak
383
+ * <p>
384
+ * !isPlural, !isPortmanteau and isPostP, if root holds the conditions then it gets the state
385
+ * with the name of PostP.
386
+ * Ex : Ait, dair
387
+ * <p>
388
+ * !isPlural, !isPortmanteau and isAdverb, if root holds the conditions then it gets the state
389
+ * with the name of AdverbRoot.
390
+ * Ex : Acilen
391
+ *
392
+ * @param fsmParse ArrayList to initialize.
393
+ * @param root word to check properties and add to fsmParse according to them.
394
+ * @param isProper is used to check a word is proper or not.
395
+ */
396
+ initializeParseList(fsmParse, root, isProper) {
397
+ let currentFsmParse;
398
+ if (root.isPlural()) {
399
+ currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("NominalRootPlural"));
400
+ fsmParse.push(currentFsmParse);
401
+ }
402
+ else {
403
+ if (root.isPortmanteauEndingWithSI()) {
404
+ currentFsmParse = new FsmParse_1.FsmParse(root.getName().substring(0, root.getName().length - 2), this.finiteStateMachine.getState("CompoundNounRoot"));
405
+ fsmParse.push(currentFsmParse);
406
+ currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("NominalRootNoPossesive"));
371
407
  fsmParse.push(currentFsmParse);
372
408
  }
373
409
  else {
374
- if (root.isPortmanteauEndingWithSI()) {
375
- currentFsmParse = new FsmParse_1.FsmParse(root.getName().substring(0, root.getName().length - 2), this.finiteStateMachine.getState("CompoundNounRoot"));
376
- fsmParse.push(currentFsmParse);
377
- currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("NominalRootNoPossesive"));
378
- fsmParse.push(currentFsmParse);
379
- }
380
- else {
381
- if (root.isPortmanteau()) {
382
- if (root.isPortmanteauFacedVowelEllipsis()) {
410
+ if (root.isPortmanteau()) {
411
+ if (root.isPortmanteauFacedVowelEllipsis()) {
412
+ currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("NominalRootNoPossesive"));
413
+ fsmParse.push(currentFsmParse);
414
+ currentFsmParse = new FsmParse_1.FsmParse(root.getName().substring(0, root.getName().length - 2) + root.getName().charAt(root.getName().length - 1) + root.getName().charAt(root.getName().length - 2), this.finiteStateMachine.getState("CompoundNounRoot"));
415
+ }
416
+ else {
417
+ if (root.isPortmanteauFacedSoftening()) {
383
418
  currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("NominalRootNoPossesive"));
384
419
  fsmParse.push(currentFsmParse);
385
- currentFsmParse = new FsmParse_1.FsmParse(root.getName().substring(0, root.getName().length - 2) + root.getName().charAt(root.getName().length - 1) + root.getName().charAt(root.getName().length - 2), this.finiteStateMachine.getState("CompoundNounRoot"));
420
+ switch (root.getName().charAt(root.getName().length - 2)) {
421
+ case 'b':
422
+ currentFsmParse = new FsmParse_1.FsmParse(root.getName().substring(0, root.getName().length - 2) + 'p', this.finiteStateMachine.getState("CompoundNounRoot"));
423
+ break;
424
+ case 'c':
425
+ currentFsmParse = new FsmParse_1.FsmParse(root.getName().substring(0, root.getName().length - 2) + 'ç', this.finiteStateMachine.getState("CompoundNounRoot"));
426
+ break;
427
+ case 'd':
428
+ currentFsmParse = new FsmParse_1.FsmParse(root.getName().substring(0, root.getName().length - 2) + 't', this.finiteStateMachine.getState("CompoundNounRoot"));
429
+ break;
430
+ case 'ğ':
431
+ currentFsmParse = new FsmParse_1.FsmParse(root.getName().substring(0, root.getName().length - 2) + 'k', this.finiteStateMachine.getState("CompoundNounRoot"));
432
+ break;
433
+ default:
434
+ currentFsmParse = new FsmParse_1.FsmParse(root.getName().substring(0, root.getName().length - 1), this.finiteStateMachine.getState("CompoundNounRoot"));
435
+ }
386
436
  }
387
437
  else {
388
- if (root.isPortmanteauFacedSoftening()) {
389
- currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("NominalRootNoPossesive"));
390
- fsmParse.push(currentFsmParse);
391
- switch (root.getName().charAt(root.getName().length - 2)) {
392
- case 'b':
393
- currentFsmParse = new FsmParse_1.FsmParse(root.getName().substring(0, root.getName().length - 2) + 'p', this.finiteStateMachine.getState("CompoundNounRoot"));
394
- break;
395
- case 'c':
396
- currentFsmParse = new FsmParse_1.FsmParse(root.getName().substring(0, root.getName().length - 2) + 'ç', this.finiteStateMachine.getState("CompoundNounRoot"));
397
- break;
398
- case 'd':
399
- currentFsmParse = new FsmParse_1.FsmParse(root.getName().substring(0, root.getName().length - 2) + 't', this.finiteStateMachine.getState("CompoundNounRoot"));
400
- break;
401
- case 'ğ':
402
- currentFsmParse = new FsmParse_1.FsmParse(root.getName().substring(0, root.getName().length - 2) + 'k', this.finiteStateMachine.getState("CompoundNounRoot"));
403
- break;
404
- default:
405
- currentFsmParse = new FsmParse_1.FsmParse(root.getName().substring(0, root.getName().length - 1), this.finiteStateMachine.getState("CompoundNounRoot"));
406
- }
407
- }
408
- else {
409
- currentFsmParse = new FsmParse_1.FsmParse(root.getName().substring(0, root.getName().length - 1), this.finiteStateMachine.getState("CompoundNounRoot"));
410
- }
438
+ currentFsmParse = new FsmParse_1.FsmParse(root.getName().substring(0, root.getName().length - 1), this.finiteStateMachine.getState("CompoundNounRoot"));
411
439
  }
440
+ }
441
+ fsmParse.push(currentFsmParse);
442
+ }
443
+ else {
444
+ if (root.isHeader()) {
445
+ currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("HeaderRoot"));
412
446
  fsmParse.push(currentFsmParse);
413
447
  }
414
- else {
415
- if (root.isHeader()) {
416
- currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("HeaderRoot"));
417
- fsmParse.push(currentFsmParse);
418
- }
419
- if (root.isInterjection()) {
420
- currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("InterjectionRoot"));
421
- fsmParse.push(currentFsmParse);
422
- }
423
- if (root.isDuplicate()) {
424
- currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("DuplicateRoot"));
425
- fsmParse.push(currentFsmParse);
426
- }
427
- if (root.isCode()) {
428
- currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("CodeRoot"));
429
- fsmParse.push(currentFsmParse);
430
- }
431
- if (root.isMetric()) {
432
- currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("MetricRoot"));
433
- fsmParse.push(currentFsmParse);
434
- }
435
- if (root.isNumeral()) {
436
- currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("CardinalRoot"));
437
- fsmParse.push(currentFsmParse);
438
- }
439
- if (root.isReal()) {
440
- currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("RealRoot"));
441
- fsmParse.push(currentFsmParse);
442
- }
443
- if (root.isFraction()) {
444
- currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("FractionRoot"));
445
- fsmParse.push(currentFsmParse);
446
- }
447
- if (root.isDate()) {
448
- currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("DateRoot"));
449
- fsmParse.push(currentFsmParse);
450
- }
451
- if (root.isPercent()) {
452
- currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("PercentRoot"));
453
- fsmParse.push(currentFsmParse);
454
- }
455
- if (root.isRange()) {
456
- currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("RangeRoot"));
457
- fsmParse.push(currentFsmParse);
458
- }
459
- if (root.isTime()) {
460
- currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("TimeRoot"));
461
- fsmParse.push(currentFsmParse);
462
- }
463
- if (root.isOrdinal()) {
464
- currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("OrdinalRoot"));
465
- fsmParse.push(currentFsmParse);
448
+ if (root.isInterjection()) {
449
+ currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("InterjectionRoot"));
450
+ fsmParse.push(currentFsmParse);
451
+ }
452
+ if (root.isDuplicate()) {
453
+ currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("DuplicateRoot"));
454
+ fsmParse.push(currentFsmParse);
455
+ }
456
+ if (root.isCode()) {
457
+ currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("CodeRoot"));
458
+ fsmParse.push(currentFsmParse);
459
+ }
460
+ if (root.isMetric()) {
461
+ currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("MetricRoot"));
462
+ fsmParse.push(currentFsmParse);
463
+ }
464
+ if (root.isNumeral()) {
465
+ currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("CardinalRoot"));
466
+ fsmParse.push(currentFsmParse);
467
+ }
468
+ if (root.isReal()) {
469
+ currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("RealRoot"));
470
+ fsmParse.push(currentFsmParse);
471
+ }
472
+ if (root.isFraction()) {
473
+ currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("FractionRoot"));
474
+ fsmParse.push(currentFsmParse);
475
+ }
476
+ if (root.isDate()) {
477
+ currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("DateRoot"));
478
+ fsmParse.push(currentFsmParse);
479
+ }
480
+ if (root.isPercent()) {
481
+ currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("PercentRoot"));
482
+ fsmParse.push(currentFsmParse);
483
+ }
484
+ if (root.isRange()) {
485
+ currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("RangeRoot"));
486
+ fsmParse.push(currentFsmParse);
487
+ }
488
+ if (root.isTime()) {
489
+ currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("TimeRoot"));
490
+ fsmParse.push(currentFsmParse);
491
+ }
492
+ if (root.isOrdinal()) {
493
+ currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("OrdinalRoot"));
494
+ fsmParse.push(currentFsmParse);
495
+ }
496
+ if (root.isVerb() || root.isPassive()) {
497
+ if (root.verbType() != "") {
498
+ currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("VerbalRoot(" + root.verbType() + ")"));
466
499
  }
467
- if (root.isVerb() || root.isPassive()) {
468
- if (root.verbType() != "") {
469
- currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("VerbalRoot(" + root.verbType() + ")"));
500
+ else {
501
+ if (!root.isPassive()) {
502
+ currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("VerbalRoot"));
470
503
  }
471
504
  else {
472
- if (!root.isPassive()) {
473
- currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("VerbalRoot"));
474
- }
475
- else {
476
- currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("PassiveHn"));
477
- }
478
- }
479
- fsmParse.push(currentFsmParse);
480
- }
481
- if (root.isPronoun()) {
482
- if (root.getName() == "kendi") {
483
- currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("PronounRoot(REFLEX)"));
484
- fsmParse.push(currentFsmParse);
485
- }
486
- if (root.getName() == "öbür" || root.getName() == "öteki" || root.getName() == "hep" || root.getName() == "kimse" || root.getName() == "diğeri" || root.getName() == "hiçbiri" || root.getName() == "böylesi" || root.getName() == "birbiri" || root.getName() == "birbirleri" || root.getName() == "biri" || root.getName() == "başkası" || root.getName() == "bazı" || root.getName() == "kimi") {
487
- currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("PronounRoot(QUANT)"));
488
- fsmParse.push(currentFsmParse);
505
+ currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("PassiveHn"));
489
506
  }
490
- if (root.getName() == "tümü" || root.getName() == "topu" || root.getName() == "herkes" || root.getName() == "cümlesi" || root.getName() == "çoğu" || root.getName() == "birçoğu" || root.getName() == "birkaçı" || root.getName() == "birçokları" || root.getName() == "hepsi") {
491
- currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("PronounRoot(QUANTPLURAL)"));
492
- fsmParse.push(currentFsmParse);
493
- }
494
- if (root.getName() == "o" || root.getName() == "bu" || root.getName() == "şu") {
495
- currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("PronounRoot(DEMONS)"));
496
- fsmParse.push(currentFsmParse);
497
- }
498
- if (root.getName() == "ben" || root.getName() == "sen" || root.getName() == "o" || root.getName() == "biz" || root.getName() == "siz" || root.getName() == "onlar") {
499
- currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("PronounRoot(PERS)"));
500
- fsmParse.push(currentFsmParse);
501
- }
502
- if (root.getName() == "nere" || root.getName() == "ne" || root.getName() == "kaçı" || root.getName() == "kim" || root.getName() == "hangi") {
503
- currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("PronounRoot(QUES)"));
504
- fsmParse.push(currentFsmParse);
505
- }
506
- }
507
- if (root.isAdjective()) {
508
- currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("AdjectiveRoot"));
509
- fsmParse.push(currentFsmParse);
510
507
  }
511
- if (root.isPureAdjective()) {
512
- currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("Adjective"));
513
- fsmParse.push(currentFsmParse);
514
- }
515
- if (root.isNominal()) {
516
- currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("NominalRoot"));
517
- fsmParse.push(currentFsmParse);
518
- }
519
- if (root.isAbbreviation()) {
520
- currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("NominalRoot"));
521
- fsmParse.push(currentFsmParse);
522
- }
523
- if (root.isProperNoun() && isProper) {
524
- currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("ProperRoot"));
508
+ fsmParse.push(currentFsmParse);
509
+ }
510
+ if (root.isPronoun()) {
511
+ if (root.getName() == "kendi") {
512
+ currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("PronounRoot(REFLEX)"));
525
513
  fsmParse.push(currentFsmParse);
526
514
  }
527
- if (root.isQuestion()) {
528
- currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("QuestionRoot"));
515
+ if (root.getName() == "öbür" || root.getName() == "öteki" || root.getName() == "hep" || root.getName() == "kimse" || root.getName() == "diğeri" || root.getName() == "hiçbiri" || root.getName() == "böylesi" || root.getName() == "birbiri" || root.getName() == "birbirleri" || root.getName() == "biri" || root.getName() == "başkası" || root.getName() == "bazı" || root.getName() == "kimi") {
516
+ currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("PronounRoot(QUANT)"));
529
517
  fsmParse.push(currentFsmParse);
530
518
  }
531
- if (root.isDeterminer()) {
532
- currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("DeterminerRoot"));
519
+ if (root.getName() == "tümü" || root.getName() == "topu" || root.getName() == "herkes" || root.getName() == "cümlesi" || root.getName() == "çoğu" || root.getName() == "birçoğu" || root.getName() == "birkaçı" || root.getName() == "birçokları" || root.getName() == "hepsi") {
520
+ currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("PronounRoot(QUANTPLURAL)"));
533
521
  fsmParse.push(currentFsmParse);
534
522
  }
535
- if (root.isConjunction()) {
536
- currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("ConjunctionRoot"));
523
+ if (root.getName() == "o" || root.getName() == "bu" || root.getName() == "şu") {
524
+ currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("PronounRoot(DEMONS)"));
537
525
  fsmParse.push(currentFsmParse);
538
526
  }
539
- if (root.isPostP()) {
540
- currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("PostP"));
527
+ if (root.getName() == "ben" || root.getName() == "sen" || root.getName() == "o" || root.getName() == "biz" || root.getName() == "siz" || root.getName() == "onlar") {
528
+ currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("PronounRoot(PERS)"));
541
529
  fsmParse.push(currentFsmParse);
542
530
  }
543
- if (root.isAdverb()) {
544
- currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("AdverbRoot"));
531
+ if (root.getName() == "nere" || root.getName() == "ne" || root.getName() == "kaçı" || root.getName() == "kim" || root.getName() == "hangi") {
532
+ currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("PronounRoot(QUES)"));
545
533
  fsmParse.push(currentFsmParse);
546
534
  }
547
535
  }
536
+ if (root.isAdjective()) {
537
+ currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("AdjectiveRoot"));
538
+ fsmParse.push(currentFsmParse);
539
+ }
540
+ if (root.isPureAdjective()) {
541
+ currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("Adjective"));
542
+ fsmParse.push(currentFsmParse);
543
+ }
544
+ if (root.isNominal()) {
545
+ currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("NominalRoot"));
546
+ fsmParse.push(currentFsmParse);
547
+ }
548
+ if (root.isAbbreviation()) {
549
+ currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("NominalRoot"));
550
+ fsmParse.push(currentFsmParse);
551
+ }
552
+ if (root.isProperNoun() && isProper) {
553
+ currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("ProperRoot"));
554
+ fsmParse.push(currentFsmParse);
555
+ }
556
+ if (root.isQuestion()) {
557
+ currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("QuestionRoot"));
558
+ fsmParse.push(currentFsmParse);
559
+ }
560
+ if (root.isDeterminer()) {
561
+ currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("DeterminerRoot"));
562
+ fsmParse.push(currentFsmParse);
563
+ }
564
+ if (root.isConjunction()) {
565
+ currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("ConjunctionRoot"));
566
+ fsmParse.push(currentFsmParse);
567
+ }
568
+ if (root.isPostP()) {
569
+ currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("PostP"));
570
+ fsmParse.push(currentFsmParse);
571
+ }
572
+ if (root.isAdverb()) {
573
+ currentFsmParse = new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState("AdverbRoot"));
574
+ fsmParse.push(currentFsmParse);
575
+ }
548
576
  }
549
577
  }
550
578
  }
551
- /**
552
- * The initializeParseListFromRoot method is used to create an {@link Array} which consists of initial fsm parsings.
553
- * First, traverses this HashSet and uses each word as a root and calls initializeParseList method with this root
554
- * and Array.
555
- * <p>
556
- *
557
- * @param parseList ArrayList to initialize.
558
- * @param root the root form to generate initial parse list.
559
- * @param isProper is used to check a word is proper or not.
560
- */
561
- initializeParseListFromRoot(parseList, root, isProper) {
562
- this.initializeParseList(parseList, root, isProper);
563
- if (root.obeysAndNotObeysVowelHarmonyDuringAgglutination()) {
564
- let newRoot = root.clone();
565
- newRoot.removeFlag("IS_UU");
566
- newRoot.removeFlag("IS_UUU");
567
- this.initializeParseList(parseList, newRoot, isProper);
568
- }
569
- if (root.rootSoftenAndNotSoftenDuringSuffixation()) {
570
- let newRoot = root.clone();
571
- newRoot.removeFlag("IS_SD");
572
- newRoot.removeFlag("IS_SDD");
573
- this.initializeParseList(parseList, newRoot, isProper);
574
- }
575
- if (root.lastIDropsAndNotDropDuringSuffixation()) {
576
- let newRoot = root.clone();
577
- newRoot.removeFlag("IS_UD");
578
- newRoot.removeFlag("IS_UDD");
579
- this.initializeParseList(parseList, newRoot, isProper);
580
- }
581
- if (root.duplicatesAndNotDuplicatesDuringSuffixation()) {
582
- let newRoot = root.clone();
583
- newRoot.removeFlag("IS_ST");
584
- newRoot.removeFlag("IS_STT");
585
- this.initializeParseList(parseList, newRoot, isProper);
586
- }
587
- if (root.endingKChangesIntoG() && root.containsFlag("IS_OA")) {
588
- let newRoot = root.clone();
589
- newRoot.removeFlag("IS_OA");
590
- this.initializeParseList(parseList, newRoot, isProper);
591
- }
579
+ }
580
+ /**
581
+ * The initializeParseListFromRoot method is used to create an {@link Array} which consists of initial fsm parsings.
582
+ * First, traverses this HashSet and uses each word as a root and calls initializeParseList method with this root
583
+ * and Array.
584
+ * <p>
585
+ *
586
+ * @param parseList ArrayList to initialize.
587
+ * @param root the root form to generate initial parse list.
588
+ * @param isProper is used to check a word is proper or not.
589
+ */
590
+ initializeParseListFromRoot(parseList, root, isProper) {
591
+ this.initializeParseList(parseList, root, isProper);
592
+ if (root.obeysAndNotObeysVowelHarmonyDuringAgglutination()) {
593
+ let newRoot = root.clone();
594
+ newRoot.removeFlag("IS_UU");
595
+ newRoot.removeFlag("IS_UUU");
596
+ this.initializeParseList(parseList, newRoot, isProper);
597
+ }
598
+ if (root.rootSoftenAndNotSoftenDuringSuffixation()) {
599
+ let newRoot = root.clone();
600
+ newRoot.removeFlag("IS_SD");
601
+ newRoot.removeFlag("IS_SDD");
602
+ this.initializeParseList(parseList, newRoot, isProper);
603
+ }
604
+ if (root.lastIDropsAndNotDropDuringSuffixation()) {
605
+ let newRoot = root.clone();
606
+ newRoot.removeFlag("IS_UD");
607
+ newRoot.removeFlag("IS_UDD");
608
+ this.initializeParseList(parseList, newRoot, isProper);
609
+ }
610
+ if (root.duplicatesAndNotDuplicatesDuringSuffixation()) {
611
+ let newRoot = root.clone();
612
+ newRoot.removeFlag("IS_ST");
613
+ newRoot.removeFlag("IS_STT");
614
+ this.initializeParseList(parseList, newRoot, isProper);
615
+ }
616
+ if (root.endingKChangesIntoG() && root.containsFlag("IS_OA")) {
617
+ let newRoot = root.clone();
618
+ newRoot.removeFlag("IS_OA");
619
+ this.initializeParseList(parseList, newRoot, isProper);
592
620
  }
593
- /**
594
- * The initializeParseListFromSurfaceForm method is used to create an {@link Array} which consists of initial fsm parsings. First,
595
- * it calls getWordsWithPrefix methods by using input String surfaceForm and generates a {@link Set}. Then, traverses
596
- * this HashSet and uses each word as a root and calls initializeParseListFromRoot method with this root and ArrayList.
597
- * <p>
598
- *
599
- * @param surfaceForm the String used to generate a HashSet of words.
600
- * @param isProper is used to check a word is proper or not.
601
- * @return initialFsmParse ArrayList.
602
- */
603
- initializeParseListFromSurfaceForm(surfaceForm, isProper) {
604
- let initialFsmParse = new Array();
605
- if (surfaceForm.length == 0) {
606
- return initialFsmParse;
621
+ }
622
+ /**
623
+ * The initializeParseListFromSurfaceForm method is used to create an {@link Array} which consists of initial fsm parsings. First,
624
+ * it calls getWordsWithPrefix methods by using input String surfaceForm and generates a {@link Set}. Then, traverses
625
+ * this HashSet and uses each word as a root and calls initializeParseListFromRoot method with this root and ArrayList.
626
+ * <p>
627
+ *
628
+ * @param surfaceForm the String used to generate a HashSet of words.
629
+ * @param isProper is used to check a word is proper or not.
630
+ * @return initialFsmParse ArrayList.
631
+ */
632
+ initializeParseListFromSurfaceForm(surfaceForm, isProper) {
633
+ let initialFsmParse = new Array();
634
+ if (surfaceForm.length == 0) {
635
+ return initialFsmParse;
636
+ }
637
+ let words = this.dictionaryTrie.getWordsWithPrefix(surfaceForm);
638
+ for (let word of words) {
639
+ let root = word;
640
+ this.initializeParseListFromRoot(initialFsmParse, root, isProper);
641
+ }
642
+ return initialFsmParse;
643
+ }
644
+ /**
645
+ * The addNewParsesFromCurrentParse method initially gets the final suffixes from input currentFsmParse called as currentState,
646
+ * and by using the currentState information it gets the new analysis. Then loops through each currentState's transition.
647
+ * If the currentTransition is possible, it makes the transition.
648
+ *
649
+ * @param currentFsmParse FsmParse type input.
650
+ * @param fsmParse an ArrayList of FsmParse.
651
+ * @param maxLength Maximum length of the parse.
652
+ * @param root TxtWord used to make transition.
653
+ */
654
+ addNewParsesFromCurrentParseLength(currentFsmParse, fsmParse, maxLength, root) {
655
+ let currentState = currentFsmParse.getFinalSuffix();
656
+ let currentSurfaceForm = currentFsmParse.getSurfaceForm();
657
+ for (let currentTransition of this.finiteStateMachine.getTransitions(currentState)) {
658
+ if (currentTransition.transitionPossibleFromParse(currentFsmParse) && (currentSurfaceForm != root.getName() ||
659
+ (currentSurfaceForm == root.getName() && currentTransition.transitionPossibleFromRoot(root, currentState)))) {
660
+ let tmp = currentTransition.makeTransition(root, currentSurfaceForm, currentFsmParse.getStartState());
661
+ if (tmp.length <= maxLength) {
662
+ let newFsmParse = currentFsmParse.clone();
663
+ newFsmParse.addSuffix(currentTransition.toState(), tmp, currentTransition.getWith(), currentTransition.toString(), currentTransition.toPos());
664
+ newFsmParse.setAgreement(currentTransition.getWith());
665
+ fsmParse.enqueue(newFsmParse);
666
+ }
607
667
  }
608
- let words = this.dictionaryTrie.getWordsWithPrefix(surfaceForm);
609
- for (let word of words) {
610
- let root = word;
611
- this.initializeParseListFromRoot(initialFsmParse, root, isProper);
668
+ }
669
+ }
670
+ /**
671
+ * The addNewParsesFromCurrentParse method initially gets the final suffixes from input currentFsmParse called as currentState,
672
+ * and by using the currentState information it gets the currentSurfaceForm. Then loops through each currentState's transition.
673
+ * If the currentTransition is possible, it makes the transition
674
+ *
675
+ * @param currentFsmParse FsmParse type input.
676
+ * @param fsmParse an ArrayList of FsmParse.
677
+ * @param surfaceForm String to use during transition.
678
+ * @param root TxtWord used to make transition.
679
+ */
680
+ addNewParsesFromCurrentParseSurfaceForm(currentFsmParse, fsmParse, surfaceForm, root) {
681
+ let currentState = currentFsmParse.getFinalSuffix();
682
+ let currentSurfaceForm = currentFsmParse.getSurfaceForm();
683
+ for (let currentTransition of this.finiteStateMachine.getTransitions(currentState)) {
684
+ if (currentTransition.transitionPossible(currentFsmParse.getSurfaceForm(), surfaceForm) && currentTransition.transitionPossibleFromParse(currentFsmParse) && (currentSurfaceForm != root.getName() || (currentSurfaceForm == root.getName() && currentTransition.transitionPossibleFromRoot(root, currentState)))) {
685
+ let tmp = currentTransition.makeTransition(root, currentSurfaceForm, currentFsmParse.getStartState());
686
+ if ((tmp.length < surfaceForm.length && this.isPossibleSubstring(tmp, surfaceForm, root)) || (tmp.length == surfaceForm.length && (root.lastIdropsDuringSuffixation() || (tmp == surfaceForm)))) {
687
+ let newFsmParse = currentFsmParse.clone();
688
+ newFsmParse.addSuffix(currentTransition.toState(), tmp, currentTransition.getWith(), currentTransition.toString(), currentTransition.toPos());
689
+ newFsmParse.setAgreement(currentTransition.getWith());
690
+ fsmParse.enqueue(newFsmParse);
691
+ }
612
692
  }
613
- return initialFsmParse;
614
693
  }
615
- /**
616
- * The addNewParsesFromCurrentParse method initially gets the final suffixes from input currentFsmParse called as currentState,
617
- * and by using the currentState information it gets the new analysis. Then loops through each currentState's transition.
618
- * If the currentTransition is possible, it makes the transition.
619
- *
620
- * @param currentFsmParse FsmParse type input.
621
- * @param fsmParse an ArrayList of FsmParse.
622
- * @param maxLength Maximum length of the parse.
623
- * @param root TxtWord used to make transition.
624
- */
625
- addNewParsesFromCurrentParseLength(currentFsmParse, fsmParse, maxLength, root) {
694
+ }
695
+ /**
696
+ * The parseExists method is used to check the existence of the parse.
697
+ *
698
+ * @param fsmParse an ArrayList of FsmParse
699
+ * @param surfaceForm String to use during transition.
700
+ * @return true when the currentState is end state and input surfaceForm id equal to currentSurfaceForm, otherwise false.
701
+ */
702
+ parseExists(fsmParse, surfaceForm) {
703
+ let parseQueue = new Queue_1.Queue(1000);
704
+ parseQueue.enqueueAll(fsmParse);
705
+ while (!parseQueue.isEmpty()) {
706
+ let currentFsmParse = parseQueue.peek();
707
+ parseQueue.dequeue();
708
+ let root = currentFsmParse.getWord();
626
709
  let currentState = currentFsmParse.getFinalSuffix();
627
710
  let currentSurfaceForm = currentFsmParse.getSurfaceForm();
628
- for (let currentTransition of this.finiteStateMachine.getTransitions(currentState)) {
629
- if (currentTransition.transitionPossibleFromParse(currentFsmParse) && (currentSurfaceForm != root.getName() ||
630
- (currentSurfaceForm == root.getName() && currentTransition.transitionPossibleFromRoot(root, currentState)))) {
631
- let tmp = currentTransition.makeTransition(root, currentSurfaceForm, currentFsmParse.getStartState());
632
- if (tmp.length <= maxLength) {
633
- let newFsmParse = currentFsmParse.clone();
634
- newFsmParse.addSuffix(currentTransition.toState(), tmp, currentTransition.getWith(), currentTransition.toString(), currentTransition.toPos());
635
- newFsmParse.setAgreement(currentTransition.getWith());
636
- fsmParse.enqueue(newFsmParse);
637
- }
638
- }
711
+ if (currentState.isEndState() && currentSurfaceForm == surfaceForm) {
712
+ return true;
639
713
  }
714
+ this.addNewParsesFromCurrentParseSurfaceForm(currentFsmParse, parseQueue, surfaceForm, root);
640
715
  }
641
- /**
642
- * The addNewParsesFromCurrentParse method initially gets the final suffixes from input currentFsmParse called as currentState,
643
- * and by using the currentState information it gets the currentSurfaceForm. Then loops through each currentState's transition.
644
- * If the currentTransition is possible, it makes the transition
645
- *
646
- * @param currentFsmParse FsmParse type input.
647
- * @param fsmParse an ArrayList of FsmParse.
648
- * @param surfaceForm String to use during transition.
649
- * @param root TxtWord used to make transition.
650
- */
651
- addNewParsesFromCurrentParseSurfaceForm(currentFsmParse, fsmParse, surfaceForm, root) {
716
+ return false;
717
+ }
718
+ /**
719
+ * The parseWord method is used to parse a given fsmParse. It simply adds new parses to the current parse by
720
+ * using addNewParsesFromCurrentParse method.
721
+ *
722
+ * @param fsmParse an ArrayList of FsmParse
723
+ * @param maxLength maximum length of the surfaceform.
724
+ * @return result {@link Array} which has the currentFsmParse.
725
+ */
726
+ parseWordLength(fsmParse, maxLength) {
727
+ let result = new Array();
728
+ let resultTransitionList = new Array();
729
+ let parseQueue = new Queue_1.Queue(1000);
730
+ parseQueue.enqueueAll(fsmParse);
731
+ while (!parseQueue.isEmpty()) {
732
+ let currentFsmParse = parseQueue.peek();
733
+ parseQueue.dequeue();
734
+ let root = currentFsmParse.getWord();
652
735
  let currentState = currentFsmParse.getFinalSuffix();
653
736
  let currentSurfaceForm = currentFsmParse.getSurfaceForm();
654
- for (let currentTransition of this.finiteStateMachine.getTransitions(currentState)) {
655
- if (currentTransition.transitionPossible(currentFsmParse.getSurfaceForm(), surfaceForm) && currentTransition.transitionPossibleFromParse(currentFsmParse) && (currentSurfaceForm != root.getName() || (currentSurfaceForm == root.getName() && currentTransition.transitionPossibleFromRoot(root, currentState)))) {
656
- let tmp = currentTransition.makeTransition(root, currentSurfaceForm, currentFsmParse.getStartState());
657
- if ((tmp.length < surfaceForm.length && this.isPossibleSubstring(tmp, surfaceForm, root)) || (tmp.length == surfaceForm.length && (root.lastIdropsDuringSuffixation() || (tmp == surfaceForm)))) {
658
- let newFsmParse = currentFsmParse.clone();
659
- newFsmParse.addSuffix(currentTransition.toState(), tmp, currentTransition.getWith(), currentTransition.toString(), currentTransition.toPos());
660
- newFsmParse.setAgreement(currentTransition.getWith());
661
- fsmParse.enqueue(newFsmParse);
662
- }
737
+ if (currentState.isEndState() && currentSurfaceForm.length <= maxLength) {
738
+ let currentTransitionList = currentSurfaceForm + " " + currentFsmParse.getFsmParseTransitionList();
739
+ if (!resultTransitionList.includes(currentTransitionList)) {
740
+ result.push(currentFsmParse);
741
+ currentFsmParse.constructInflectionalGroups();
742
+ resultTransitionList.push(currentTransitionList);
663
743
  }
664
744
  }
745
+ this.addNewParsesFromCurrentParseLength(currentFsmParse, parseQueue, maxLength, root);
665
746
  }
666
- /**
667
- * The parseExists method is used to check the existence of the parse.
668
- *
669
- * @param fsmParse an ArrayList of FsmParse
670
- * @param surfaceForm String to use during transition.
671
- * @return true when the currentState is end state and input surfaceForm id equal to currentSurfaceForm, otherwise false.
672
- */
673
- parseExists(fsmParse, surfaceForm) {
674
- let parseQueue = new Queue_1.Queue(1000);
675
- parseQueue.enqueueAll(fsmParse);
676
- while (!parseQueue.isEmpty()) {
677
- let currentFsmParse = parseQueue.peek();
678
- parseQueue.dequeue();
679
- let root = currentFsmParse.getWord();
680
- let currentState = currentFsmParse.getFinalSuffix();
681
- let currentSurfaceForm = currentFsmParse.getSurfaceForm();
682
- if (currentState.isEndState() && currentSurfaceForm == surfaceForm) {
683
- return true;
747
+ return result;
748
+ }
749
+ /**
750
+ * The parseWord method is used to parse a given fsmParse. It simply adds new parses to the current parse by
751
+ * using addNewParsesFromCurrentParse method.
752
+ *
753
+ * @param fsmParse an ArrayList of FsmParse
754
+ * @param surfaceForm String to use during transition.
755
+ * @return result {@link Array} which has the currentFsmParse.
756
+ */
757
+ parseWordSurfaceForm(fsmParse, surfaceForm) {
758
+ let result = new Array();
759
+ let resultTransitionList = new Array();
760
+ let parseQueue = new Queue_1.Queue(1000);
761
+ parseQueue.enqueueAll(fsmParse);
762
+ while (!parseQueue.isEmpty()) {
763
+ let currentFsmParse = parseQueue.peek();
764
+ parseQueue.dequeue();
765
+ let root = currentFsmParse.getWord();
766
+ let currentState = currentFsmParse.getFinalSuffix();
767
+ let currentSurfaceForm = currentFsmParse.getSurfaceForm();
768
+ if (currentState.isEndState() && currentSurfaceForm == surfaceForm) {
769
+ let currentTransitionList = currentFsmParse.getFsmParseTransitionList();
770
+ if (!resultTransitionList.includes(currentTransitionList)) {
771
+ result.push(currentFsmParse);
772
+ currentFsmParse.constructInflectionalGroups();
773
+ resultTransitionList.push(currentTransitionList);
684
774
  }
685
- this.addNewParsesFromCurrentParseSurfaceForm(currentFsmParse, parseQueue, surfaceForm, root);
686
775
  }
687
- return false;
776
+ this.addNewParsesFromCurrentParseSurfaceForm(currentFsmParse, parseQueue, surfaceForm, root);
688
777
  }
689
- /**
690
- * The parseWord method is used to parse a given fsmParse. It simply adds new parses to the current parse by
691
- * using addNewParsesFromCurrentParse method.
692
- *
693
- * @param fsmParse an ArrayList of FsmParse
694
- * @param maxLength maximum length of the surfaceform.
695
- * @return result {@link Array} which has the currentFsmParse.
696
- */
697
- parseWordLength(fsmParse, maxLength) {
698
- let result = new Array();
699
- let resultTransitionList = new Array();
700
- let parseQueue = new Queue_1.Queue(1000);
701
- parseQueue.enqueueAll(fsmParse);
702
- while (!parseQueue.isEmpty()) {
703
- let currentFsmParse = parseQueue.peek();
704
- parseQueue.dequeue();
705
- let root = currentFsmParse.getWord();
706
- let currentState = currentFsmParse.getFinalSuffix();
707
- let currentSurfaceForm = currentFsmParse.getSurfaceForm();
708
- if (currentState.isEndState() && currentSurfaceForm.length <= maxLength) {
709
- let currentTransitionList = currentSurfaceForm + " " + currentFsmParse.getFsmParseTransitionList();
710
- if (!resultTransitionList.includes(currentTransitionList)) {
711
- result.push(currentFsmParse);
712
- currentFsmParse.constructInflectionalGroups();
713
- resultTransitionList.push(currentTransitionList);
714
- }
715
- }
716
- this.addNewParsesFromCurrentParseLength(currentFsmParse, parseQueue, maxLength, root);
717
- }
718
- return result;
778
+ return result;
779
+ }
780
+ /**
781
+ * The morphologicalAnalysis with 3 inputs is used to initialize an {@link Array} and add a new FsmParse
782
+ * with given root and state.
783
+ *
784
+ * @param root TxtWord input.
785
+ * @param surfaceForm String input to use for parsing.
786
+ * @param state String input.
787
+ * @return parseWord method with newly populated FsmParse ArrayList and input surfaceForm.
788
+ */
789
+ morphologicalAnalysisFromRoot(root, surfaceForm, state) {
790
+ let initialFsmParse = new Array();
791
+ if (state != undefined) {
792
+ initialFsmParse.push(new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState(state)));
793
+ return this.parseWordSurfaceForm(initialFsmParse, surfaceForm);
719
794
  }
720
- /**
721
- * The parseWord method is used to parse a given fsmParse. It simply adds new parses to the current parse by
722
- * using addNewParsesFromCurrentParse method.
723
- *
724
- * @param fsmParse an ArrayList of FsmParse
725
- * @param surfaceForm String to use during transition.
726
- * @return result {@link Array} which has the currentFsmParse.
727
- */
728
- parseWordSurfaceForm(fsmParse, surfaceForm) {
729
- let result = new Array();
730
- let resultTransitionList = new Array();
731
- let parseQueue = new Queue_1.Queue(1000);
732
- parseQueue.enqueueAll(fsmParse);
733
- while (!parseQueue.isEmpty()) {
734
- let currentFsmParse = parseQueue.peek();
735
- parseQueue.dequeue();
736
- let root = currentFsmParse.getWord();
737
- let currentState = currentFsmParse.getFinalSuffix();
738
- let currentSurfaceForm = currentFsmParse.getSurfaceForm();
739
- if (currentState.isEndState() && currentSurfaceForm == surfaceForm) {
740
- let currentTransitionList = currentFsmParse.getFsmParseTransitionList();
741
- if (!resultTransitionList.includes(currentTransitionList)) {
742
- result.push(currentFsmParse);
743
- currentFsmParse.constructInflectionalGroups();
744
- resultTransitionList.push(currentTransitionList);
745
- }
746
- }
747
- this.addNewParsesFromCurrentParseSurfaceForm(currentFsmParse, parseQueue, surfaceForm, root);
748
- }
749
- return result;
795
+ else {
796
+ this.initializeParseListFromRoot(initialFsmParse, root, this.isProperNoun(surfaceForm));
797
+ return this.parseWordSurfaceForm(initialFsmParse, surfaceForm);
750
798
  }
751
- /**
752
- * The morphologicalAnalysis with 3 inputs is used to initialize an {@link Array} and add a new FsmParse
753
- * with given root and state.
754
- *
755
- * @param root TxtWord input.
756
- * @param surfaceForm String input to use for parsing.
757
- * @param state String input.
758
- * @return parseWord method with newly populated FsmParse ArrayList and input surfaceForm.
759
- */
760
- morphologicalAnalysisFromRoot(root, surfaceForm, state) {
761
- let initialFsmParse = new Array();
762
- if (state != undefined) {
763
- initialFsmParse.push(new FsmParse_1.FsmParse(root, this.finiteStateMachine.getState(state)));
764
- return this.parseWordSurfaceForm(initialFsmParse, surfaceForm);
765
- }
766
- else {
767
- this.initializeParseListFromRoot(initialFsmParse, root, this.isProperNoun(surfaceForm));
768
- return this.parseWordSurfaceForm(initialFsmParse, surfaceForm);
769
- }
799
+ }
800
+ distinctSurfaceFormList(parseList) {
801
+ let items = new Set();
802
+ for (let parse of parseList) {
803
+ items.add(parse.getSurfaceForm());
770
804
  }
771
- distinctSurfaceFormList(parseList) {
772
- let items = new Set();
773
- for (let parse of parseList) {
774
- items.add(parse.getSurfaceForm());
775
- }
776
- return items;
777
- }
778
- /**
779
- * The generateAllParses with 2 inputs is used to generate all parses with given root. Then it calls initializeParseListFromRoot method to initialize list with newly created ArrayList, input root,
780
- * and maximum length.
781
- *
782
- * @param root TxtWord input.
783
- * @param maxLength Maximum length of the surface form.
784
- * @return parseWord method with newly populated FsmParse ArrayList and maximum length.
785
- */
786
- generateAllParses(root, maxLength) {
787
- let initialFsmParse = new Array();
788
- if (root.isProperNoun()) {
789
- this.initializeParseListFromRoot(initialFsmParse, root, true);
790
- }
791
- this.initializeParseListFromRoot(initialFsmParse, root, false);
792
- return this.parseWordLength(initialFsmParse, maxLength);
793
- }
794
- /**
795
- * Replaces previous lemma in the sentence with the new lemma. Both lemma can contain multiple words.
796
- * @param original Original sentence to be replaced with.
797
- * @param previousWord Root word in the original sentence
798
- * @param newWord New word to be replaced.
799
- * @return Newly generated sentence by replacing the previous word in the original sentence with the new word.
800
- */
801
- replaceWord(original, previousWord, newWord) {
802
- let previousWordSplitted = undefined, newWordSplitted = undefined;
803
- let result = new Sentence_1.Sentence();
804
- let replacedWord = undefined;
805
- let previousWordMultiple = previousWord.includes(" ");
806
- let newWordMultiple = newWord.includes(" ");
807
- let lastWord;
808
- if (previousWordMultiple) {
809
- previousWordSplitted = previousWord.split(" ");
810
- lastWord = previousWordSplitted[previousWordSplitted.length - 1];
811
- }
812
- else {
813
- lastWord = previousWord;
814
- }
815
- let newRootWord;
816
- if (newWordMultiple) {
817
- newWordSplitted = newWord.split(" ");
818
- newRootWord = newWordSplitted[newWordSplitted.length - 1];
819
- }
820
- else {
821
- newRootWord = newWord;
805
+ return items;
806
+ }
807
+ /**
808
+ * The generateAllParses with 2 inputs is used to generate all parses with given root. Then it calls initializeParseListFromRoot method to initialize list with newly created ArrayList, input root,
809
+ * and maximum length.
810
+ *
811
+ * @param root TxtWord input.
812
+ * @param maxLength Maximum length of the surface form.
813
+ * @return parseWord method with newly populated FsmParse ArrayList and maximum length.
814
+ */
815
+ generateAllParses(root, maxLength) {
816
+ let initialFsmParse = new Array();
817
+ if (root.isProperNoun()) {
818
+ this.initializeParseListFromRoot(initialFsmParse, root, true);
819
+ }
820
+ this.initializeParseListFromRoot(initialFsmParse, root, false);
821
+ return this.parseWordLength(initialFsmParse, maxLength);
822
+ }
823
+ /**
824
+ * Replaces previous lemma in the sentence with the new lemma. Both lemma can contain multiple words.
825
+ * @param original Original sentence to be replaced with.
826
+ * @param previousWord Root word in the original sentence
827
+ * @param newWord New word to be replaced.
828
+ * @return Newly generated sentence by replacing the previous word in the original sentence with the new word.
829
+ */
830
+ replaceWord(original, previousWord, newWord) {
831
+ let previousWordSplitted = undefined, newWordSplitted = undefined;
832
+ let result = new Sentence_1.Sentence();
833
+ let replacedWord = undefined;
834
+ let previousWordMultiple = previousWord.includes(" ");
835
+ let newWordMultiple = newWord.includes(" ");
836
+ let lastWord;
837
+ if (previousWordMultiple) {
838
+ previousWordSplitted = previousWord.split(" ");
839
+ lastWord = previousWordSplitted[previousWordSplitted.length - 1];
840
+ }
841
+ else {
842
+ lastWord = previousWord;
843
+ }
844
+ let newRootWord;
845
+ if (newWordMultiple) {
846
+ newWordSplitted = newWord.split(" ");
847
+ newRootWord = newWordSplitted[newWordSplitted.length - 1];
848
+ }
849
+ else {
850
+ newRootWord = newWord;
851
+ }
852
+ let newRootTxtWord = this.dictionary.getWord(newRootWord);
853
+ let parseList = this.morphologicalAnalysisFromSentence(original);
854
+ let i;
855
+ for (i = 0; i < parseList.length; i++) {
856
+ let replaced = false;
857
+ for (let j = 0; j < parseList[i].size(); j++) {
858
+ if (parseList[i].getFsmParse(j).getWord().getName() == lastWord && newRootTxtWord != undefined) {
859
+ replaced = true;
860
+ replacedWord = parseList[i].getFsmParse(j).replaceRootWord(newRootTxtWord);
861
+ }
822
862
  }
823
- let newRootTxtWord = this.dictionary.getWord(newRootWord);
824
- let parseList = this.morphologicalAnalysisFromSentence(original);
825
- let i;
826
- for (i = 0; i < parseList.length; i++) {
827
- let replaced = false;
828
- for (let j = 0; j < parseList[i].size(); j++) {
829
- if (parseList[i].getFsmParse(j).getWord().getName() == lastWord && newRootTxtWord != undefined) {
830
- replaced = true;
831
- replacedWord = parseList[i].getFsmParse(j).replaceRootWord(newRootTxtWord);
863
+ if (replaced && replacedWord != null) {
864
+ if (previousWordMultiple) {
865
+ for (let k = 0; k < i - previousWordSplitted.length + 1; k++) {
866
+ result.addWord(original.getWord(k));
832
867
  }
833
868
  }
834
- if (replaced && replacedWord != null) {
835
- if (previousWordMultiple) {
836
- for (let k = 0; k < i - previousWordSplitted.length + 1; k++) {
837
- result.addWord(original.getWord(k));
869
+ if (newWordMultiple) {
870
+ for (let k = 0; k < newWordSplitted.length - 1; k++) {
871
+ if (result.wordCount() == 0) {
872
+ result.addWord(new Word_1.Word((newWordSplitted[k].charAt(0) + "").toLocaleUpperCase("tr") + newWordSplitted[k].substring(1)));
838
873
  }
839
- }
840
- if (newWordMultiple) {
841
- for (let k = 0; k < newWordSplitted.length - 1; k++) {
842
- if (result.wordCount() == 0) {
843
- result.addWord(new Word_1.Word((newWordSplitted[k].charAt(0) + "").toLocaleUpperCase("tr") + newWordSplitted[k].substring(1)));
844
- }
845
- else {
846
- result.addWord(new Word_1.Word(newWordSplitted[k]));
847
- }
874
+ else {
875
+ result.addWord(new Word_1.Word(newWordSplitted[k]));
848
876
  }
849
877
  }
850
- if (result.wordCount() == 0) {
851
- replacedWord = (replacedWord.charAt(0) + "").toLocaleUpperCase("tr") + replacedWord.substring(1);
852
- }
853
- result.addWord(new Word_1.Word(replacedWord));
854
- if (previousWordMultiple) {
855
- i++;
856
- break;
857
- }
858
878
  }
859
- else {
860
- if (!previousWordMultiple) {
861
- result.addWord(original.getWord(i));
862
- }
879
+ if (result.wordCount() == 0) {
880
+ replacedWord = (replacedWord.charAt(0) + "").toLocaleUpperCase("tr") + replacedWord.substring(1);
881
+ }
882
+ result.addWord(new Word_1.Word(replacedWord));
883
+ if (previousWordMultiple) {
884
+ i++;
885
+ break;
863
886
  }
864
887
  }
865
- if (previousWordMultiple) {
866
- for (; i < parseList.length; i++) {
888
+ else {
889
+ if (!previousWordMultiple) {
867
890
  result.addWord(original.getWord(i));
868
891
  }
869
892
  }
870
- return result;
871
893
  }
872
- /**
873
- * The analysisExists method checks several cases. If the given surfaceForm is a punctuation or double then it
874
- * returns true. If it is not a root word, then it initializes the parse list and returns the parseExists method with
875
- * this newly initialized list and surfaceForm.
876
- *
877
- * @param rootWord TxtWord root.
878
- * @param surfaceForm String input.
879
- * @param isProper boolean variable indicates a word is proper or not.
880
- * @return true if surfaceForm is punctuation or double, otherwise returns parseExist method with given surfaceForm.
881
- */
882
- analysisExists(rootWord, surfaceForm, isProper) {
883
- if (Word_1.Word.isPunctuation(surfaceForm)) {
884
- return true;
885
- }
886
- if (this.isDouble(surfaceForm)) {
887
- return true;
888
- }
889
- let initialFsmParse;
890
- if (rootWord != null) {
891
- initialFsmParse = new Array();
892
- this.initializeParseListFromRoot(initialFsmParse, rootWord, isProper);
893
- }
894
- else {
895
- initialFsmParse = this.initializeParseListFromSurfaceForm(surfaceForm, isProper);
896
- }
897
- return this.parseExists(initialFsmParse, surfaceForm);
898
- }
899
- /**
900
- * The analysis method is used by the morphologicalAnalysis method. It gets String surfaceForm as an input and checks
901
- * its type such as punctuation, number or compares with the regex for date, fraction, percent, time, range, hashtag,
902
- * and mail or checks its variable type as integer or double. After finding the right case for given surfaceForm, it calls
903
- * constructInflectionalGroups method which creates sub-word units.
904
- *
905
- * @param surfaceForm String to analyse.
906
- * @param isProper is used to indicate the proper words.
907
- * @return ArrayList type initialFsmParse which holds the analyses.
908
- */
909
- analysis(surfaceForm, isProper) {
910
- let initialFsmParse, fsmParse;
911
- if (Word_1.Word.isPunctuation(surfaceForm) && surfaceForm != "%") {
912
- initialFsmParse = new Array();
913
- fsmParse = new FsmParse_1.FsmParse(surfaceForm, new State_1.State(("Punctuation"), true, true));
914
- fsmParse.constructInflectionalGroups();
915
- initialFsmParse.push(fsmParse);
916
- return initialFsmParse;
917
- }
918
- if (this.isNumber(surfaceForm)) {
919
- initialFsmParse = new Array();
920
- fsmParse = new FsmParse_1.FsmParse(surfaceForm, new State_1.State(("CardinalRoot"), true, true));
921
- fsmParse.constructInflectionalGroups();
922
- initialFsmParse.push(fsmParse);
923
- return initialFsmParse;
924
- }
925
- if (this.patternMatches("^\\d+/\\d+$", surfaceForm)) {
926
- initialFsmParse = new Array();
927
- fsmParse = new FsmParse_1.FsmParse(surfaceForm, new State_1.State(("FractionRoot"), true, true));
928
- fsmParse.constructInflectionalGroups();
929
- initialFsmParse.push(fsmParse);
930
- fsmParse = new FsmParse_1.FsmParse(surfaceForm, new State_1.State(("DateRoot"), true, true));
931
- fsmParse.constructInflectionalGroups();
932
- initialFsmParse.push(fsmParse);
933
- return initialFsmParse;
934
- }
935
- if (this.isDate(surfaceForm)) {
936
- initialFsmParse = new Array();
937
- fsmParse = new FsmParse_1.FsmParse(surfaceForm, new State_1.State(("DateRoot"), true, true));
938
- fsmParse.constructInflectionalGroups();
939
- initialFsmParse.push(fsmParse);
940
- return initialFsmParse;
941
- }
942
- if (this.patternMatches("^\\d+\\\\/\\d+$", surfaceForm)) {
943
- initialFsmParse = new Array();
944
- fsmParse = new FsmParse_1.FsmParse(surfaceForm, new State_1.State(("FractionRoot"), true, true));
945
- fsmParse.constructInflectionalGroups();
946
- initialFsmParse.push(fsmParse);
947
- return initialFsmParse;
948
- }
949
- if (surfaceForm == "%" || this.isPercent(surfaceForm)) {
950
- initialFsmParse = new Array();
951
- fsmParse = new FsmParse_1.FsmParse(surfaceForm, new State_1.State(("PercentRoot"), true, true));
952
- fsmParse.constructInflectionalGroups();
953
- initialFsmParse.push(fsmParse);
954
- return initialFsmParse;
955
- }
956
- if (this.isTime(surfaceForm)) {
957
- initialFsmParse = new Array();
958
- fsmParse = new FsmParse_1.FsmParse(surfaceForm, new State_1.State(("TimeRoot"), true, true));
959
- fsmParse.constructInflectionalGroups();
960
- initialFsmParse.push(fsmParse);
961
- return initialFsmParse;
962
- }
963
- if (this.isRange(surfaceForm)) {
964
- initialFsmParse = new Array();
965
- fsmParse = new FsmParse_1.FsmParse(surfaceForm, new State_1.State(("RangeRoot"), true, true));
966
- fsmParse.constructInflectionalGroups();
967
- initialFsmParse.push(fsmParse);
968
- return initialFsmParse;
969
- }
970
- if (surfaceForm.startsWith("#")) {
971
- initialFsmParse = new Array();
972
- fsmParse = new FsmParse_1.FsmParse(surfaceForm, new State_1.State(("Hashtag"), true, true));
973
- fsmParse.constructInflectionalGroups();
974
- initialFsmParse.push(fsmParse);
975
- return initialFsmParse;
976
- }
977
- if (surfaceForm.includes("@")) {
978
- initialFsmParse = new Array();
979
- fsmParse = new FsmParse_1.FsmParse(surfaceForm, new State_1.State(("Email"), true, true));
980
- fsmParse.constructInflectionalGroups();
981
- initialFsmParse.push(fsmParse);
982
- return initialFsmParse;
983
- }
984
- if (surfaceForm.endsWith(".") && this.isInteger(surfaceForm.substring(0, surfaceForm.length - 1))) {
985
- initialFsmParse = new Array();
986
- fsmParse = new FsmParse_1.FsmParse(Number.parseInt(surfaceForm.substring(0, surfaceForm.length - 1)), this.finiteStateMachine.getState("OrdinalRoot"));
987
- fsmParse.constructInflectionalGroups();
988
- initialFsmParse.push(fsmParse);
989
- return initialFsmParse;
990
- }
991
- if (this.isInteger(surfaceForm)) {
992
- initialFsmParse = new Array();
993
- fsmParse = new FsmParse_1.FsmParse(Number.parseInt(surfaceForm), this.finiteStateMachine.getState("CardinalRoot"));
994
- fsmParse.constructInflectionalGroups();
995
- initialFsmParse.push(fsmParse);
996
- return initialFsmParse;
997
- }
998
- if (this.isDouble(surfaceForm)) {
999
- initialFsmParse = new Array();
1000
- fsmParse = new FsmParse_1.FsmParse(Number.parseFloat(surfaceForm), this.finiteStateMachine.getState("RealRoot"));
1001
- fsmParse.constructInflectionalGroups();
1002
- initialFsmParse.push(fsmParse);
1003
- return initialFsmParse;
894
+ if (previousWordMultiple) {
895
+ for (; i < parseList.length; i++) {
896
+ result.addWord(original.getWord(i));
1004
897
  }
898
+ }
899
+ return result;
900
+ }
901
+ /**
902
+ * The analysisExists method checks several cases. If the given surfaceForm is a punctuation or double then it
903
+ * returns true. If it is not a root word, then it initializes the parse list and returns the parseExists method with
904
+ * this newly initialized list and surfaceForm.
905
+ *
906
+ * @param rootWord TxtWord root.
907
+ * @param surfaceForm String input.
908
+ * @param isProper boolean variable indicates a word is proper or not.
909
+ * @return true if surfaceForm is punctuation or double, otherwise returns parseExist method with given surfaceForm.
910
+ */
911
+ analysisExists(rootWord, surfaceForm, isProper) {
912
+ if (Word_1.Word.isPunctuation(surfaceForm)) {
913
+ return true;
914
+ }
915
+ if (this.isDouble(surfaceForm)) {
916
+ return true;
917
+ }
918
+ let initialFsmParse;
919
+ if (rootWord != null) {
920
+ initialFsmParse = new Array();
921
+ this.initializeParseListFromRoot(initialFsmParse, rootWord, isProper);
922
+ }
923
+ else {
1005
924
  initialFsmParse = this.initializeParseListFromSurfaceForm(surfaceForm, isProper);
1006
- return this.parseWordSurfaceForm(initialFsmParse, surfaceForm);
1007
925
  }
1008
- /**
1009
- * This method uses cache idea to speed up pattern matching in Fsm. mostUsedPatterns stores the compiled forms of
1010
- * the previously used patterns. When Fsm tries to match a string to a pattern, first we check if it exists in
1011
- * mostUsedPatterns. If it exists, we directly use the compiled pattern to match the string. Otherwise, new pattern
1012
- * is compiled and put in the mostUsedPatterns.
1013
- * @param expr Pattern to check
1014
- * @param value String to match the pattern
1015
- * @return True if the string matches the pattern, false otherwise.
1016
- */
1017
- patternMatches(expr, value) {
1018
- let p = this.mostUsedPatterns.get(expr);
1019
- if (p == undefined) {
1020
- p = RegExp(expr);
1021
- this.mostUsedPatterns.set(expr, p);
1022
- }
1023
- return value.match(p) != null;
1024
- }
1025
- /**
1026
- * The isProperNoun method takes surfaceForm String as input and checks its each char whether they are in the range
1027
- * of letters between A to Z or one of the Turkish letters such as İ, Ü, Ğ, Ş, Ç, and Ö.
1028
- *
1029
- * @param surfaceForm String to check for proper noun.
1030
- * @return false if surfaceForm is null or length of 0, return true if it is a letter.
1031
- */
1032
- isProperNoun(surfaceForm) {
1033
- if (surfaceForm == undefined || surfaceForm.length == 0) {
1034
- return false;
926
+ return this.parseExists(initialFsmParse, surfaceForm);
927
+ }
928
+ /**
929
+ * The analysis method is used by the morphologicalAnalysis method. It gets String surfaceForm as an input and checks
930
+ * its type such as punctuation, number or compares with the regex for date, fraction, percent, time, range, hashtag,
931
+ * and mail or checks its variable type as integer or double. After finding the right case for given surfaceForm, it calls
932
+ * constructInflectionalGroups method which creates sub-word units.
933
+ *
934
+ * @param surfaceForm String to analyse.
935
+ * @param isProper is used to indicate the proper words.
936
+ * @return ArrayList type initialFsmParse which holds the analyses.
937
+ */
938
+ analysis(surfaceForm, isProper) {
939
+ let initialFsmParse, fsmParse;
940
+ if (Word_1.Word.isPunctuation(surfaceForm) && surfaceForm != "%") {
941
+ initialFsmParse = new Array();
942
+ fsmParse = new FsmParse_1.FsmParse(surfaceForm, new State_1.State(("Punctuation"), true, true));
943
+ fsmParse.constructInflectionalGroups();
944
+ initialFsmParse.push(fsmParse);
945
+ return initialFsmParse;
946
+ }
947
+ if (this.isNumber(surfaceForm)) {
948
+ initialFsmParse = new Array();
949
+ fsmParse = new FsmParse_1.FsmParse(surfaceForm, new State_1.State(("CardinalRoot"), true, true));
950
+ fsmParse.constructInflectionalGroups();
951
+ initialFsmParse.push(fsmParse);
952
+ return initialFsmParse;
953
+ }
954
+ if (this.patternMatches("^\\d+/\\d+$", surfaceForm)) {
955
+ initialFsmParse = new Array();
956
+ fsmParse = new FsmParse_1.FsmParse(surfaceForm, new State_1.State(("FractionRoot"), true, true));
957
+ fsmParse.constructInflectionalGroups();
958
+ initialFsmParse.push(fsmParse);
959
+ fsmParse = new FsmParse_1.FsmParse(surfaceForm, new State_1.State(("DateRoot"), true, true));
960
+ fsmParse.constructInflectionalGroups();
961
+ initialFsmParse.push(fsmParse);
962
+ return initialFsmParse;
963
+ }
964
+ if (this.isDate(surfaceForm)) {
965
+ initialFsmParse = new Array();
966
+ fsmParse = new FsmParse_1.FsmParse(surfaceForm, new State_1.State(("DateRoot"), true, true));
967
+ fsmParse.constructInflectionalGroups();
968
+ initialFsmParse.push(fsmParse);
969
+ return initialFsmParse;
970
+ }
971
+ if (this.patternMatches("^\\d+\\\\/\\d+$", surfaceForm)) {
972
+ initialFsmParse = new Array();
973
+ fsmParse = new FsmParse_1.FsmParse(surfaceForm, new State_1.State(("FractionRoot"), true, true));
974
+ fsmParse.constructInflectionalGroups();
975
+ initialFsmParse.push(fsmParse);
976
+ return initialFsmParse;
977
+ }
978
+ if (surfaceForm == "%" || this.isPercent(surfaceForm)) {
979
+ initialFsmParse = new Array();
980
+ fsmParse = new FsmParse_1.FsmParse(surfaceForm, new State_1.State(("PercentRoot"), true, true));
981
+ fsmParse.constructInflectionalGroups();
982
+ initialFsmParse.push(fsmParse);
983
+ return initialFsmParse;
984
+ }
985
+ if (this.isTime(surfaceForm)) {
986
+ initialFsmParse = new Array();
987
+ fsmParse = new FsmParse_1.FsmParse(surfaceForm, new State_1.State(("TimeRoot"), true, true));
988
+ fsmParse.constructInflectionalGroups();
989
+ initialFsmParse.push(fsmParse);
990
+ return initialFsmParse;
991
+ }
992
+ if (this.isRange(surfaceForm)) {
993
+ initialFsmParse = new Array();
994
+ fsmParse = new FsmParse_1.FsmParse(surfaceForm, new State_1.State(("RangeRoot"), true, true));
995
+ fsmParse.constructInflectionalGroups();
996
+ initialFsmParse.push(fsmParse);
997
+ return initialFsmParse;
998
+ }
999
+ if (surfaceForm.startsWith("#")) {
1000
+ initialFsmParse = new Array();
1001
+ fsmParse = new FsmParse_1.FsmParse(surfaceForm, new State_1.State(("Hashtag"), true, true));
1002
+ fsmParse.constructInflectionalGroups();
1003
+ initialFsmParse.push(fsmParse);
1004
+ return initialFsmParse;
1005
+ }
1006
+ if (surfaceForm.includes("@")) {
1007
+ initialFsmParse = new Array();
1008
+ fsmParse = new FsmParse_1.FsmParse(surfaceForm, new State_1.State(("Email"), true, true));
1009
+ fsmParse.constructInflectionalGroups();
1010
+ initialFsmParse.push(fsmParse);
1011
+ return initialFsmParse;
1012
+ }
1013
+ if (surfaceForm.endsWith(".") && this.isInteger(surfaceForm.substring(0, surfaceForm.length - 1))) {
1014
+ initialFsmParse = new Array();
1015
+ fsmParse = new FsmParse_1.FsmParse(Number.parseInt(surfaceForm.substring(0, surfaceForm.length - 1)), this.finiteStateMachine.getState("OrdinalRoot"));
1016
+ fsmParse.constructInflectionalGroups();
1017
+ initialFsmParse.push(fsmParse);
1018
+ return initialFsmParse;
1019
+ }
1020
+ if (this.isInteger(surfaceForm)) {
1021
+ initialFsmParse = new Array();
1022
+ fsmParse = new FsmParse_1.FsmParse(Number.parseInt(surfaceForm), this.finiteStateMachine.getState("CardinalRoot"));
1023
+ fsmParse.constructInflectionalGroups();
1024
+ initialFsmParse.push(fsmParse);
1025
+ return initialFsmParse;
1026
+ }
1027
+ if (this.isDouble(surfaceForm)) {
1028
+ initialFsmParse = new Array();
1029
+ fsmParse = new FsmParse_1.FsmParse(Number.parseFloat(surfaceForm), this.finiteStateMachine.getState("RealRoot"));
1030
+ fsmParse.constructInflectionalGroups();
1031
+ initialFsmParse.push(fsmParse);
1032
+ return initialFsmParse;
1033
+ }
1034
+ initialFsmParse = this.initializeParseListFromSurfaceForm(surfaceForm, isProper);
1035
+ return this.parseWordSurfaceForm(initialFsmParse, surfaceForm);
1036
+ }
1037
+ /**
1038
+ * This method uses cache idea to speed up pattern matching in Fsm. mostUsedPatterns stores the compiled forms of
1039
+ * the previously used patterns. When Fsm tries to match a string to a pattern, first we check if it exists in
1040
+ * mostUsedPatterns. If it exists, we directly use the compiled pattern to match the string. Otherwise, new pattern
1041
+ * is compiled and put in the mostUsedPatterns.
1042
+ * @param expr Pattern to check
1043
+ * @param value String to match the pattern
1044
+ * @return True if the string matches the pattern, false otherwise.
1045
+ */
1046
+ patternMatches(expr, value) {
1047
+ let p = this.mostUsedPatterns.get(expr);
1048
+ if (p == undefined) {
1049
+ p = RegExp(expr);
1050
+ this.mostUsedPatterns.set(expr, p);
1051
+ }
1052
+ return value.match(p) != null;
1053
+ }
1054
+ /**
1055
+ * The isProperNoun method takes surfaceForm String as input and checks its each char whether they are in the range
1056
+ * of letters between A to Z or one of the Turkish letters such as İ, Ü, Ğ, Ş, Ç, and Ö.
1057
+ *
1058
+ * @param surfaceForm String to check for proper noun.
1059
+ * @return false if surfaceForm is null or length of 0, return true if it is a letter.
1060
+ */
1061
+ isProperNoun(surfaceForm) {
1062
+ if (surfaceForm == undefined || surfaceForm.length == 0) {
1063
+ return false;
1064
+ }
1065
+ return (surfaceForm.charAt(0) >= 'A' && surfaceForm.charAt(0) <= 'Z') || (surfaceForm.charAt(0) == '\u0130') ||
1066
+ (surfaceForm.charAt(0) == '\u00dc') || (surfaceForm.charAt(0) == '\u011e') || (surfaceForm.charAt(0) == '\u015e') ||
1067
+ (surfaceForm.charAt(0) == '\u00c7') || (surfaceForm.charAt(0) == '\u00d6'); // İ, Ü, Ğ, Ş, Ç, Ö
1068
+ }
1069
+ /**
1070
+ * The isCode method takes surfaceForm String as input and checks if it consists of both letters and numbers
1071
+ *
1072
+ * @param surfaceForm String to check for code-like word.
1073
+ * @return true if it is a code-like word, return false otherwise.
1074
+ */
1075
+ isCode(surfaceForm) {
1076
+ if (surfaceForm == undefined || surfaceForm.length == 0) {
1077
+ return false;
1078
+ }
1079
+ return this.patternMatches("^.*[0-9].*$", surfaceForm) && this.patternMatches("^.*[a-zA-ZçöğüşıÇÖĞÜŞİ].*$", surfaceForm);
1080
+ }
1081
+ /**
1082
+ * Identifies a possible new root word for a given surface form. It also adds the new root form to the dictionary
1083
+ * for further usage. The method first searches the suffix trie for the reverse string of the surface form. This
1084
+ * way, it can identify if the word has a suffix that is in the most frequently used suffix list. Since a word can
1085
+ * have multiple possible suffixes, the method identifies the longest suffix and returns the substring of the
1086
+ * surface form tht does not contain the suffix. Let say the word is 'googlelaştırdık', it will identify 'tık' as
1087
+ * a suffix and will return 'googlelaştır' as a possible root form. Another example will be 'homelesslerimizle', it
1088
+ * will identify 'lerimizle' as suffix and will return 'homeless' as a possible root form. If the root word ends
1089
+ * with 'ğ', it is replacesd with 'k'. 'morfolojikliğini' will return 'morfolojikliğ' then which will be replaced
1090
+ * with 'morfolojiklik'.
1091
+ * @param surfaceForm Surface form for which we will identify a possible new root form.
1092
+ * @return Possible new root form.
1093
+ */
1094
+ rootOfPossiblyNewWord(surfaceForm) {
1095
+ let words = this.suffixTrie.getWordsWithPrefix(this.reverseString(surfaceForm));
1096
+ let candidateWord = null;
1097
+ let candidateList = new Array();
1098
+ for (let word of words) {
1099
+ candidateWord = surfaceForm.substring(0, surfaceForm.length - word.getName().length);
1100
+ let newWord;
1101
+ if (candidateWord.endsWith("ğ")) {
1102
+ candidateWord = candidateWord.substring(0, candidateWord.length - 1) + "k";
1103
+ newWord = new TxtWord_1.TxtWord(candidateWord, "CL_ISIM");
1104
+ newWord.addFlag("IS_SD");
1035
1105
  }
1036
- return (surfaceForm.charAt(0) >= 'A' && surfaceForm.charAt(0) <= 'Z') || (surfaceForm.charAt(0) == '\u0130') ||
1037
- (surfaceForm.charAt(0) == '\u00dc') || (surfaceForm.charAt(0) == '\u011e') || (surfaceForm.charAt(0) == '\u015e') ||
1038
- (surfaceForm.charAt(0) == '\u00c7') || (surfaceForm.charAt(0) == '\u00d6'); // İ, Ü, Ğ, Ş, Ç, Ö
1039
- }
1040
- /**
1041
- * The isCode method takes surfaceForm String as input and checks if it consists of both letters and numbers
1042
- *
1043
- * @param surfaceForm String to check for code-like word.
1044
- * @return true if it is a code-like word, return false otherwise.
1045
- */
1046
- isCode(surfaceForm) {
1047
- if (surfaceForm == undefined || surfaceForm.length == 0) {
1048
- return false;
1106
+ else {
1107
+ newWord = new TxtWord_1.TxtWord(candidateWord, "CL_ISIM");
1108
+ newWord.addFlag("CL_FIIL");
1049
1109
  }
1050
- return this.patternMatches("^.*[0-9].*$", surfaceForm) && this.patternMatches("^.*[a-zA-ZçöğüşıÇÖĞÜŞİ].*$", surfaceForm);
1051
- }
1052
- /**
1053
- * Identifies a possible new root word for a given surface form. It also adds the new root form to the dictionary
1054
- * for further usage. The method first searches the suffix trie for the reverse string of the surface form. This
1055
- * way, it can identify if the word has a suffix that is in the most frequently used suffix list. Since a word can
1056
- * have multiple possible suffixes, the method identifies the longest suffix and returns the substring of the
1057
- * surface form tht does not contain the suffix. Let say the word is 'googlelaştırdık', it will identify 'tık' as
1058
- * a suffix and will return 'googlelaştır' as a possible root form. Another example will be 'homelesslerimizle', it
1059
- * will identify 'lerimizle' as suffix and will return 'homeless' as a possible root form. If the root word ends
1060
- * with 'ğ', it is replacesd with 'k'. 'morfolojikliğini' will return 'morfolojikliğ' then which will be replaced
1061
- * with 'morfolojiklik'.
1062
- * @param surfaceForm Surface form for which we will identify a possible new root form.
1063
- * @return Possible new root form.
1064
- */
1065
- rootOfPossiblyNewWord(surfaceForm) {
1066
- let words = this.suffixTrie.getWordsWithPrefix(this.reverseString(surfaceForm));
1067
- let candidateWord = null;
1068
- let candidateList = new Array();
1069
- for (let word of words) {
1070
- candidateWord = surfaceForm.substring(0, surfaceForm.length - word.getName().length);
1071
- let newWord;
1072
- if (candidateWord.endsWith("ğ")) {
1073
- candidateWord = candidateWord.substring(0, candidateWord.length - 1) + "k";
1074
- newWord = new TxtWord_1.TxtWord(candidateWord, "CL_ISIM");
1075
- newWord.addFlag("IS_SD");
1076
- }
1077
- else {
1078
- newWord = new TxtWord_1.TxtWord(candidateWord, "CL_ISIM");
1079
- newWord.addFlag("CL_FIIL");
1080
- }
1081
- candidateList.push(newWord);
1082
- this.dictionaryTrie.addWord(candidateWord, newWord);
1110
+ candidateList.push(newWord);
1111
+ this.dictionaryTrie.addWord(candidateWord, newWord);
1112
+ }
1113
+ return candidateList;
1114
+ }
1115
+ /**
1116
+ * The robustMorphologicalAnalysis is used to analyse surfaceForm String. First it gets the currentParse of the surfaceForm
1117
+ * then, if the size of the currentParse is 0, and given surfaceForm is a proper noun, it adds the surfaceForm
1118
+ * whose state name is ProperRoot to an {@link Array}, of it is not a proper noon, it adds the surfaceForm
1119
+ * whose state name is NominalRoot to the {@link Array}.
1120
+ *
1121
+ * @param surfaceForm String to analyse.
1122
+ * @return FsmParseList type currentParse which holds morphological analysis of the surfaceForm.
1123
+ */
1124
+ robustMorphologicalAnalysis(surfaceForm) {
1125
+ if (surfaceForm == undefined || surfaceForm == "") {
1126
+ return new FsmParseList_1.FsmParseList(new Array());
1127
+ }
1128
+ let currentParse = this.morphologicalAnalysis(surfaceForm);
1129
+ if (currentParse.size() == 0) {
1130
+ let fsmParse = new Array();
1131
+ if (this.isProperNoun(surfaceForm)) {
1132
+ fsmParse.push(new FsmParse_1.FsmParse(surfaceForm, this.finiteStateMachine.getState("ProperRoot")));
1083
1133
  }
1084
- return candidateList;
1085
- }
1086
- /**
1087
- * The robustMorphologicalAnalysis is used to analyse surfaceForm String. First it gets the currentParse of the surfaceForm
1088
- * then, if the size of the currentParse is 0, and given surfaceForm is a proper noun, it adds the surfaceForm
1089
- * whose state name is ProperRoot to an {@link Array}, of it is not a proper noon, it adds the surfaceForm
1090
- * whose state name is NominalRoot to the {@link Array}.
1091
- *
1092
- * @param surfaceForm String to analyse.
1093
- * @return FsmParseList type currentParse which holds morphological analysis of the surfaceForm.
1094
- */
1095
- robustMorphologicalAnalysis(surfaceForm) {
1096
- if (surfaceForm == undefined || surfaceForm == "") {
1097
- return new FsmParseList_1.FsmParseList(new Array());
1134
+ if (this.isCode(surfaceForm)) {
1135
+ fsmParse.push(new FsmParse_1.FsmParse(surfaceForm, this.finiteStateMachine.getState("CodeRoot")));
1098
1136
  }
1099
- let currentParse = this.morphologicalAnalysis(surfaceForm);
1100
- if (currentParse.size() == 0) {
1101
- let fsmParse = new Array();
1102
- if (this.isProperNoun(surfaceForm)) {
1103
- fsmParse.push(new FsmParse_1.FsmParse(surfaceForm, this.finiteStateMachine.getState("ProperRoot")));
1104
- }
1105
- if (this.isCode(surfaceForm)) {
1106
- fsmParse.push(new FsmParse_1.FsmParse(surfaceForm, this.finiteStateMachine.getState("CodeRoot")));
1107
- }
1108
- let newCandidateList = this.rootOfPossiblyNewWord(surfaceForm);
1109
- if (newCandidateList.length != 0) {
1110
- for (let word of newCandidateList) {
1111
- fsmParse.push(new FsmParse_1.FsmParse(word, this.finiteStateMachine.getState("VerbalRoot")));
1112
- fsmParse.push(new FsmParse_1.FsmParse(word, this.finiteStateMachine.getState("NominalRoot")));
1113
- }
1137
+ let newCandidateList = this.rootOfPossiblyNewWord(surfaceForm);
1138
+ if (newCandidateList.length != 0) {
1139
+ for (let word of newCandidateList) {
1140
+ fsmParse.push(new FsmParse_1.FsmParse(word, this.finiteStateMachine.getState("VerbalRoot")));
1141
+ fsmParse.push(new FsmParse_1.FsmParse(word, this.finiteStateMachine.getState("NominalRoot")));
1114
1142
  }
1115
- fsmParse.push(new FsmParse_1.FsmParse(surfaceForm, this.finiteStateMachine.getState("NominalRoot")));
1116
- return new FsmParseList_1.FsmParseList(this.parseWordSurfaceForm(fsmParse, surfaceForm));
1117
- }
1118
- else {
1119
- return currentParse;
1120
1143
  }
1144
+ fsmParse.push(new FsmParse_1.FsmParse(surfaceForm, this.finiteStateMachine.getState("NominalRoot")));
1145
+ return new FsmParseList_1.FsmParseList(this.parseWordSurfaceForm(fsmParse, surfaceForm));
1121
1146
  }
1122
- /**
1123
- * The morphologicalAnalysis is used for debug purposes.
1124
- *
1125
- * @param sentence to get word from.
1126
- * @return FsmParseList type result.
1127
- */
1128
- morphologicalAnalysisFromSentence(sentence) {
1129
- let result = new Array();
1130
- for (let i = 0; i < sentence.wordCount(); i++) {
1131
- let originalForm = sentence.getWord(i).getName();
1132
- let spellCorrectedForm = this.dictionary.getCorrectForm(originalForm);
1133
- if (spellCorrectedForm == undefined) {
1134
- spellCorrectedForm = originalForm;
1135
- }
1136
- let wordFsmParseList = this.morphologicalAnalysis(spellCorrectedForm);
1137
- result.push(wordFsmParseList);
1138
- }
1139
- return result;
1147
+ else {
1148
+ return currentParse;
1140
1149
  }
1141
- /**
1142
- * The robustMorphologicalAnalysis method takes just one argument as an input. It gets the name of the words from
1143
- * input sentence then calls robustMorphologicalAnalysis with surfaceForm.
1144
- *
1145
- * @param sentence Sentence type input used to get surfaceForm.
1146
- * @return FsmParseList array which holds the result of the analysis.
1147
- */
1148
- robustMorphologicalAnalysisFromSentence(sentence) {
1149
- let result = new Array();
1150
- for (let i = 0; i < sentence.wordCount(); i++) {
1151
- let originalForm = sentence.getWord(i).getName();
1152
- let spellCorrectedForm = this.dictionary.getCorrectForm(originalForm);
1153
- if (spellCorrectedForm == undefined) {
1154
- spellCorrectedForm = originalForm;
1155
- }
1156
- let fsmParseList = this.robustMorphologicalAnalysis(spellCorrectedForm);
1157
- result.push(fsmParseList);
1158
- }
1159
- return result;
1150
+ }
1151
+ /**
1152
+ * The morphologicalAnalysis is used for debug purposes.
1153
+ *
1154
+ * @param sentence to get word from.
1155
+ * @return FsmParseList type result.
1156
+ */
1157
+ morphologicalAnalysisFromSentence(sentence) {
1158
+ let result = new Array();
1159
+ for (let i = 0; i < sentence.wordCount(); i++) {
1160
+ let originalForm = sentence.getWord(i).getName();
1161
+ let spellCorrectedForm = this.dictionary.getCorrectForm(originalForm);
1162
+ if (spellCorrectedForm == undefined) {
1163
+ spellCorrectedForm = originalForm;
1164
+ }
1165
+ let wordFsmParseList = this.morphologicalAnalysis(spellCorrectedForm);
1166
+ result.push(wordFsmParseList);
1167
+ }
1168
+ return result;
1169
+ }
1170
+ /**
1171
+ * The robustMorphologicalAnalysis method takes just one argument as an input. It gets the name of the words from
1172
+ * input sentence then calls robustMorphologicalAnalysis with surfaceForm.
1173
+ *
1174
+ * @param sentence Sentence type input used to get surfaceForm.
1175
+ * @return FsmParseList array which holds the result of the analysis.
1176
+ */
1177
+ robustMorphologicalAnalysisFromSentence(sentence) {
1178
+ let result = new Array();
1179
+ for (let i = 0; i < sentence.wordCount(); i++) {
1180
+ let originalForm = sentence.getWord(i).getName();
1181
+ let spellCorrectedForm = this.dictionary.getCorrectForm(originalForm);
1182
+ if (spellCorrectedForm == undefined) {
1183
+ spellCorrectedForm = originalForm;
1184
+ }
1185
+ let fsmParseList = this.robustMorphologicalAnalysis(spellCorrectedForm);
1186
+ result.push(fsmParseList);
1160
1187
  }
1161
- /**
1162
- * The isInteger method compares input surfaceForm with regex \+?\d+ and returns the result.
1163
- * Supports positive integer checks only.
1164
- *
1165
- * @param surfaceForm String to check.
1166
- * @return true if surfaceForm matches with the regex.
1167
- */
1168
- isInteger(surfaceForm) {
1169
- if (!this.patternMatches("^[+-]?\\d+$", surfaceForm))
1188
+ return result;
1189
+ }
1190
+ /**
1191
+ * The isInteger method compares input surfaceForm with regex \+?\d+ and returns the result.
1192
+ * Supports positive integer checks only.
1193
+ *
1194
+ * @param surfaceForm String to check.
1195
+ * @return true if surfaceForm matches with the regex.
1196
+ */
1197
+ isInteger(surfaceForm) {
1198
+ if (!this.patternMatches("^[+-]?\\d+$", surfaceForm))
1199
+ return false;
1200
+ let len = surfaceForm.length;
1201
+ if (len < 10) {
1202
+ return true;
1203
+ }
1204
+ else {
1205
+ if (len > 10) {
1170
1206
  return false;
1171
- let len = surfaceForm.length;
1172
- if (len < 10) {
1173
- return true;
1174
1207
  }
1175
1208
  else {
1176
- if (len > 10) {
1177
- return false;
1178
- }
1179
- else {
1180
- return surfaceForm >= "2147483647";
1181
- }
1209
+ return surfaceForm >= "2147483647";
1182
1210
  }
1183
1211
  }
1184
- /**
1185
- * The isDouble method compares input surfaceForm with regex \+?(\d+)?\.\d* and returns the result.
1186
- *
1187
- * @param surfaceForm String to check.
1188
- * @return true if surfaceForm matches with the regex.
1189
- */
1190
- isDouble(surfaceForm) {
1191
- return this.patternMatches("^[+-]?(\\d+)?\\.\\d*$", surfaceForm);
1192
- }
1193
- /**
1194
- * The isNumber method compares input surfaceForm with the array of written numbers and returns the result.
1195
- *
1196
- * @param surfaceForm String to check.
1197
- * @return true if surfaceForm matches with the regex.
1198
- */
1199
- isNumber(surfaceForm) {
1200
- let count = 0;
1201
- let numbers = ["bir", "iki", "üç", "dört", "beş", "altı", "yedi", "sekiz", "dokuz",
1202
- "on", "yirmi", "otuz", "kırk", "elli", "altmış", "yetmiş", "seksen", "doksan",
1203
- "yüz", "bin", "milyon", "milyar", "trilyon", "katrilyon"];
1204
- let word = surfaceForm;
1205
- while (word != "") {
1206
- let found = false;
1207
- for (let number of numbers) {
1208
- if (word.startsWith(number)) {
1209
- found = true;
1210
- count++;
1211
- word = word.substring(number.length);
1212
- break;
1213
- }
1214
- }
1215
- if (!found) {
1212
+ }
1213
+ /**
1214
+ * The isDouble method compares input surfaceForm with regex \+?(\d+)?\.\d* and returns the result.
1215
+ *
1216
+ * @param surfaceForm String to check.
1217
+ * @return true if surfaceForm matches with the regex.
1218
+ */
1219
+ isDouble(surfaceForm) {
1220
+ return this.patternMatches("^[+-]?(\\d+)?\\.\\d*$", surfaceForm);
1221
+ }
1222
+ /**
1223
+ * The isNumber method compares input surfaceForm with the array of written numbers and returns the result.
1224
+ *
1225
+ * @param surfaceForm String to check.
1226
+ * @return true if surfaceForm matches with the regex.
1227
+ */
1228
+ isNumber(surfaceForm) {
1229
+ let count = 0;
1230
+ let numbers = ["bir", "iki", "üç", "dört", "beş", "altı", "yedi", "sekiz", "dokuz",
1231
+ "on", "yirmi", "otuz", "kırk", "elli", "altmış", "yetmiş", "seksen", "doksan",
1232
+ "yüz", "bin", "milyon", "milyar", "trilyon", "katrilyon"];
1233
+ let word = surfaceForm;
1234
+ while (word != "") {
1235
+ let found = false;
1236
+ for (let number of numbers) {
1237
+ if (word.startsWith(number)) {
1238
+ found = true;
1239
+ count++;
1240
+ word = word.substring(number.length);
1216
1241
  break;
1217
1242
  }
1218
1243
  }
1219
- return word == "" && count > 1;
1220
- }
1221
- /**
1222
- * Checks if a given surface form matches to a percent value. It should be something like %4, %45, %4.3 or %56.786
1223
- * @param surfaceForm Surface form to be checked.
1224
- * @return True if the surface form is in percent form
1225
- */
1226
- isPercent(surfaceForm) {
1227
- return this.patternMatches("^%(\\d\\d|\\d)$", surfaceForm) ||
1228
- this.patternMatches("^%(\\d\\d|\\d)\\.\\d+$", surfaceForm);
1229
- }
1230
- /**
1231
- * Checks if a given surface form matches to a time form. It should be something like 3:34, 12:56 etc.
1232
- * @param surfaceForm Surface form to be checked.
1233
- * @return True if the surface form is in time form
1234
- */
1235
- isTime(surfaceForm) {
1236
- return this.patternMatches("^(\\d\\d|\\d):(\\d\\d|\\d):(\\d\\d|\\d)$", surfaceForm) ||
1237
- this.patternMatches("^(\\d\\d|\\d):(\\d\\d|\\d)$", surfaceForm);
1238
- }
1239
- /**
1240
- * Checks if a given surface form matches to a range form. It should be something like 123-1400 or 12:34-15:78 or
1241
- * 3.45-4.67.
1242
- * @param surfaceForm Surface form to be checked.
1243
- * @return True if the surface form is in range form
1244
- */
1245
- isRange(surfaceForm) {
1246
- return this.patternMatches("^\\d+-\\d+$", surfaceForm) ||
1247
- this.patternMatches("^(\\d\\d|\\d):(\\d\\d|\\d)-(\\d\\d|\\d):(\\d\\d|\\d)$", surfaceForm) ||
1248
- this.patternMatches("^(\\d\\d|\\d)\\.(\\d\\d|\\d)-(\\d\\d|\\d)\\.(\\d\\d|\\d)$", surfaceForm);
1249
- }
1250
- /**
1251
- * Checks if a given surface form matches to a date form. It should be something like 3/10/2023 or 2.3.2012
1252
- * @param surfaceForm Surface form to be checked.
1253
- * @return True if the surface form is in date form
1254
- */
1255
- isDate(surfaceForm) {
1256
- return this.patternMatches("^(\\d\\d|\\d)/(\\d\\d|\\d)/\\d+$", surfaceForm) ||
1257
- this.patternMatches("^(\\d\\d|\\d)\\.(\\d\\d|\\d)\\.\\d+$", surfaceForm);
1258
- }
1259
- /**
1260
- * The morphologicalAnalysis method is used to analyse a FsmParseList by comparing with the regex.
1261
- * It creates an {@link Array} fsmParse to hold the result of the analysis method. For each surfaceForm input,
1262
- * it gets a substring and considers it as a possibleRoot. Then compares with the regex.
1263
- * <p>
1264
- * If the surfaceForm input string matches with Turkish chars like Ç, Ş, İ, Ü, Ö, it adds the surfaceForm to Trie with IS_OA tag.
1265
- * If the possibleRoot contains /, then it is added to the Trie with IS_KESIR tag.
1266
- * If the possibleRoot contains \d\d|\d)/(\d\d|\d)/\d+, then it is added to the Trie with IS_DATE tag.
1267
- * If the possibleRoot contains \\d\d|\d, then it is added to the Trie with IS_PERCENT tag.
1268
- * If the possibleRoot contains \d\d|\d):(\d\d|\d):(\d\d|\d), then it is added to the Trie with IS_ZAMAN tag.
1269
- * If the possibleRoot contains \d+-\d+, then it is added to the Trie with IS_RANGE tag.
1270
- * If the possibleRoot is an Integer, then it is added to the Trie with IS_SAYI tag.
1271
- * If the possibleRoot is a Double, then it is added to the Trie with IS_REELSAYI tag.
1272
- *
1273
- * @param surfaceForm String to analyse.
1274
- * @return fsmParseList which holds the analysis.
1275
- */
1276
- morphologicalAnalysis(surfaceForm) {
1277
- let lowerCased = surfaceForm.toLocaleLowerCase("tr");
1278
- let possibleRootLowerCased = "", pronunciation = "";
1279
- let isRootReplaced = false;
1280
- if (this.parsedSurfaceForms != undefined && this.parsedSurfaceForms.has(lowerCased) &&
1281
- !this.isInteger(surfaceForm) && !this.isDouble(surfaceForm) && !this.isPercent(surfaceForm) &&
1282
- !this.isTime(surfaceForm) && !this.isRange(surfaceForm) && !this.isDate(surfaceForm)) {
1283
- let parses = new Array();
1284
- parses.push(new FsmParse_1.FsmParse(new Word_1.Word(this.parsedSurfaceForms.get(lowerCased))));
1285
- return new FsmParseList_1.FsmParseList(parses);
1286
- }
1287
- if (this.cache != undefined && this.cache.contains(surfaceForm)) {
1288
- return this.cache.get(surfaceForm);
1244
+ if (!found) {
1245
+ break;
1289
1246
  }
1290
- if (this.patternMatches("^(\\w|Ç|Ş|İ|Ü|Ö)\\.$", surfaceForm)) {
1291
- this.dictionaryTrie.addWord(lowerCased, new TxtWord_1.TxtWord(lowerCased, "IS_OA"));
1247
+ }
1248
+ return word == "" && count > 1;
1249
+ }
1250
+ /**
1251
+ * Checks if a given surface form matches to a percent value. It should be something like %4, %45, %4.3 or %56.786
1252
+ * @param surfaceForm Surface form to be checked.
1253
+ * @return True if the surface form is in percent form
1254
+ */
1255
+ isPercent(surfaceForm) {
1256
+ return this.patternMatches("^%(\\d\\d|\\d)$", surfaceForm) ||
1257
+ this.patternMatches("^%(\\d\\d|\\d)\\.\\d+$", surfaceForm);
1258
+ }
1259
+ /**
1260
+ * Checks if a given surface form matches to a time form. It should be something like 3:34, 12:56 etc.
1261
+ * @param surfaceForm Surface form to be checked.
1262
+ * @return True if the surface form is in time form
1263
+ */
1264
+ isTime(surfaceForm) {
1265
+ return this.patternMatches("^(\\d\\d|\\d):(\\d\\d|\\d):(\\d\\d|\\d)$", surfaceForm) ||
1266
+ this.patternMatches("^(\\d\\d|\\d):(\\d\\d|\\d)$", surfaceForm);
1267
+ }
1268
+ /**
1269
+ * Checks if a given surface form matches to a range form. It should be something like 123-1400 or 12:34-15:78 or
1270
+ * 3.45-4.67.
1271
+ * @param surfaceForm Surface form to be checked.
1272
+ * @return True if the surface form is in range form
1273
+ */
1274
+ isRange(surfaceForm) {
1275
+ return this.patternMatches("^\\d+-\\d+$", surfaceForm) ||
1276
+ this.patternMatches("^(\\d\\d|\\d):(\\d\\d|\\d)-(\\d\\d|\\d):(\\d\\d|\\d)$", surfaceForm) ||
1277
+ this.patternMatches("^(\\d\\d|\\d)\\.(\\d\\d|\\d)-(\\d\\d|\\d)\\.(\\d\\d|\\d)$", surfaceForm);
1278
+ }
1279
+ /**
1280
+ * Checks if a given surface form matches to a date form. It should be something like 3/10/2023 or 2.3.2012
1281
+ * @param surfaceForm Surface form to be checked.
1282
+ * @return True if the surface form is in date form
1283
+ */
1284
+ isDate(surfaceForm) {
1285
+ return this.patternMatches("^(\\d\\d|\\d)/(\\d\\d|\\d)/\\d+$", surfaceForm) ||
1286
+ this.patternMatches("^(\\d\\d|\\d)\\.(\\d\\d|\\d)\\.\\d+$", surfaceForm);
1287
+ }
1288
+ /**
1289
+ * The morphologicalAnalysis method is used to analyse a FsmParseList by comparing with the regex.
1290
+ * It creates an {@link Array} fsmParse to hold the result of the analysis method. For each surfaceForm input,
1291
+ * it gets a substring and considers it as a possibleRoot. Then compares with the regex.
1292
+ * <p>
1293
+ * If the surfaceForm input string matches with Turkish chars like Ç, Ş, İ, Ü, Ö, it adds the surfaceForm to Trie with IS_OA tag.
1294
+ * If the possibleRoot contains /, then it is added to the Trie with IS_KESIR tag.
1295
+ * If the possibleRoot contains \d\d|\d)/(\d\d|\d)/\d+, then it is added to the Trie with IS_DATE tag.
1296
+ * If the possibleRoot contains \\d\d|\d, then it is added to the Trie with IS_PERCENT tag.
1297
+ * If the possibleRoot contains \d\d|\d):(\d\d|\d):(\d\d|\d), then it is added to the Trie with IS_ZAMAN tag.
1298
+ * If the possibleRoot contains \d+-\d+, then it is added to the Trie with IS_RANGE tag.
1299
+ * If the possibleRoot is an Integer, then it is added to the Trie with IS_SAYI tag.
1300
+ * If the possibleRoot is a Double, then it is added to the Trie with IS_REELSAYI tag.
1301
+ *
1302
+ * @param surfaceForm String to analyse.
1303
+ * @return fsmParseList which holds the analysis.
1304
+ */
1305
+ morphologicalAnalysis(surfaceForm) {
1306
+ let lowerCased = surfaceForm.toLocaleLowerCase("tr");
1307
+ let possibleRootLowerCased = "", pronunciation = "";
1308
+ let isRootReplaced = false;
1309
+ if (this.parsedSurfaceForms != undefined && this.parsedSurfaceForms.has(lowerCased) &&
1310
+ !this.isInteger(surfaceForm) && !this.isDouble(surfaceForm) && !this.isPercent(surfaceForm) &&
1311
+ !this.isTime(surfaceForm) && !this.isRange(surfaceForm) && !this.isDate(surfaceForm)) {
1312
+ let parses = new Array();
1313
+ parses.push(new FsmParse_1.FsmParse(new Word_1.Word(this.parsedSurfaceForms.get(lowerCased))));
1314
+ return new FsmParseList_1.FsmParseList(parses);
1315
+ }
1316
+ if (this.cache != undefined && this.cache.contains(surfaceForm)) {
1317
+ return this.cache.get(surfaceForm);
1318
+ }
1319
+ if (this.patternMatches("^(\\w|Ç|Ş|İ|Ü|Ö)\\.$", surfaceForm)) {
1320
+ this.dictionaryTrie.addWord(lowerCased, new TxtWord_1.TxtWord(lowerCased, "IS_OA"));
1321
+ }
1322
+ let defaultFsmParse = this.analysis(lowerCased, this.isProperNoun(surfaceForm));
1323
+ if (defaultFsmParse.length > 0) {
1324
+ let fsmParseList = new FsmParseList_1.FsmParseList(defaultFsmParse);
1325
+ if (this.cache != undefined) {
1326
+ this.cache.add(surfaceForm, fsmParseList);
1292
1327
  }
1293
- let defaultFsmParse = this.analysis(lowerCased, this.isProperNoun(surfaceForm));
1294
- if (defaultFsmParse.length > 0) {
1295
- let fsmParseList = new FsmParseList_1.FsmParseList(defaultFsmParse);
1296
- if (this.cache != undefined) {
1297
- this.cache.add(surfaceForm, fsmParseList);
1328
+ return fsmParseList;
1329
+ }
1330
+ let fsmParse = new Array();
1331
+ if (surfaceForm.includes("'")) {
1332
+ let possibleRoot = surfaceForm.substring(0, surfaceForm.indexOf('\''));
1333
+ if (possibleRoot != "") {
1334
+ if (possibleRoot.includes("/") || possibleRoot.includes("\\/")) {
1335
+ this.dictionaryTrie.addWord(possibleRoot, new TxtWord_1.TxtWord(possibleRoot, "IS_KESIR"));
1336
+ fsmParse = this.analysis(lowerCased, this.isProperNoun(surfaceForm));
1298
1337
  }
1299
- return fsmParseList;
1300
- }
1301
- let fsmParse = new Array();
1302
- if (surfaceForm.includes("'")) {
1303
- let possibleRoot = surfaceForm.substring(0, surfaceForm.indexOf('\''));
1304
- if (possibleRoot != "") {
1305
- if (possibleRoot.includes("/") || possibleRoot.includes("\\/")) {
1306
- this.dictionaryTrie.addWord(possibleRoot, new TxtWord_1.TxtWord(possibleRoot, "IS_KESIR"));
1338
+ else {
1339
+ if (this.isDate(possibleRoot)) {
1340
+ this.dictionaryTrie.addWord(possibleRoot, new TxtWord_1.TxtWord(possibleRoot, "IS_DATE"));
1307
1341
  fsmParse = this.analysis(lowerCased, this.isProperNoun(surfaceForm));
1308
1342
  }
1309
1343
  else {
1310
- if (this.isDate(possibleRoot)) {
1311
- this.dictionaryTrie.addWord(possibleRoot, new TxtWord_1.TxtWord(possibleRoot, "IS_DATE"));
1344
+ if (this.patternMatches("^\\d+/\\d+$", possibleRoot)) {
1345
+ this.dictionaryTrie.addWord(possibleRoot, new TxtWord_1.TxtWord(possibleRoot, "IS_KESIR"));
1312
1346
  fsmParse = this.analysis(lowerCased, this.isProperNoun(surfaceForm));
1313
1347
  }
1314
1348
  else {
1315
- if (this.patternMatches("^\\d+/\\d+$", possibleRoot)) {
1316
- this.dictionaryTrie.addWord(possibleRoot, new TxtWord_1.TxtWord(possibleRoot, "IS_KESIR"));
1349
+ if (this.isPercent(possibleRoot)) {
1350
+ this.dictionaryTrie.addWord(possibleRoot, new TxtWord_1.TxtWord(possibleRoot, "IS_PERCENT"));
1317
1351
  fsmParse = this.analysis(lowerCased, this.isProperNoun(surfaceForm));
1318
1352
  }
1319
1353
  else {
1320
- if (this.isPercent(possibleRoot)) {
1321
- this.dictionaryTrie.addWord(possibleRoot, new TxtWord_1.TxtWord(possibleRoot, "IS_PERCENT"));
1354
+ if (this.isTime(surfaceForm)) {
1355
+ this.dictionaryTrie.addWord(possibleRoot, new TxtWord_1.TxtWord(possibleRoot, "IS_ZAMAN"));
1322
1356
  fsmParse = this.analysis(lowerCased, this.isProperNoun(surfaceForm));
1323
1357
  }
1324
1358
  else {
1325
- if (this.isTime(surfaceForm)) {
1326
- this.dictionaryTrie.addWord(possibleRoot, new TxtWord_1.TxtWord(possibleRoot, "IS_ZAMAN"));
1359
+ if (this.isRange(surfaceForm)) {
1360
+ this.dictionaryTrie.addWord(possibleRoot, new TxtWord_1.TxtWord(possibleRoot, "IS_RANGE"));
1327
1361
  fsmParse = this.analysis(lowerCased, this.isProperNoun(surfaceForm));
1328
1362
  }
1329
1363
  else {
1330
- if (this.isRange(surfaceForm)) {
1331
- this.dictionaryTrie.addWord(possibleRoot, new TxtWord_1.TxtWord(possibleRoot, "IS_RANGE"));
1364
+ if (this.isInteger(possibleRoot)) {
1365
+ this.dictionaryTrie.addWord(possibleRoot, new TxtWord_1.TxtWord(possibleRoot, "IS_SAYI"));
1332
1366
  fsmParse = this.analysis(lowerCased, this.isProperNoun(surfaceForm));
1333
1367
  }
1334
1368
  else {
1335
- if (this.isInteger(possibleRoot)) {
1336
- this.dictionaryTrie.addWord(possibleRoot, new TxtWord_1.TxtWord(possibleRoot, "IS_SAYI"));
1369
+ if (this.isDouble(possibleRoot)) {
1370
+ this.dictionaryTrie.addWord(possibleRoot, new TxtWord_1.TxtWord(possibleRoot, "IS_REELSAYI"));
1337
1371
  fsmParse = this.analysis(lowerCased, this.isProperNoun(surfaceForm));
1338
1372
  }
1339
1373
  else {
1340
- if (this.isDouble(possibleRoot)) {
1341
- this.dictionaryTrie.addWord(possibleRoot, new TxtWord_1.TxtWord(possibleRoot, "IS_REELSAYI"));
1342
- fsmParse = this.analysis(lowerCased, this.isProperNoun(surfaceForm));
1343
- }
1344
- else {
1345
- if (Word_1.Word.isCapital(possibleRoot) || "QXW".includes(possibleRoot.substring(0, 1))) {
1346
- let newWord = undefined;
1347
- possibleRootLowerCased = possibleRoot.toLocaleLowerCase("tr");
1348
- if (this.pronunciations.has(possibleRootLowerCased)) {
1349
- isRootReplaced = true;
1350
- pronunciation = this.pronunciations.get(possibleRootLowerCased);
1351
- if (this.dictionary.getWord(pronunciation) != null) {
1352
- this.dictionary.getWord(pronunciation).addFlag("IS_OA");
1353
- }
1354
- else {
1355
- newWord = new TxtWord_1.TxtWord(pronunciation, "IS_OA");
1356
- this.dictionaryTrie.addWord(pronunciation, newWord);
1357
- }
1358
- let replacedWord = pronunciation + lowerCased.substring(possibleRootLowerCased.length);
1359
- fsmParse = this.analysis(replacedWord, this.isProperNoun(surfaceForm));
1374
+ if (Word_1.Word.isCapital(possibleRoot) || "QXW".includes(possibleRoot.substring(0, 1))) {
1375
+ let newWord = undefined;
1376
+ possibleRootLowerCased = possibleRoot.toLocaleLowerCase("tr");
1377
+ if (this.pronunciations.has(possibleRootLowerCased)) {
1378
+ isRootReplaced = true;
1379
+ pronunciation = this.pronunciations.get(possibleRootLowerCased);
1380
+ if (this.dictionary.getWord(pronunciation) != null) {
1381
+ this.dictionary.getWord(pronunciation).addFlag("IS_OA");
1360
1382
  }
1361
1383
  else {
1362
- if (this.dictionary.getWord(possibleRootLowerCased) != null) {
1363
- this.dictionary.getWord(possibleRootLowerCased).addFlag("IS_OA");
1364
- }
1365
- else {
1366
- newWord = new TxtWord_1.TxtWord(possibleRootLowerCased, "IS_OA");
1367
- this.dictionaryTrie.addWord(possibleRootLowerCased, newWord);
1368
- }
1369
- fsmParse = this.analysis(lowerCased, this.isProperNoun(surfaceForm));
1384
+ newWord = new TxtWord_1.TxtWord(pronunciation, "IS_OA");
1385
+ this.dictionaryTrie.addWord(pronunciation, newWord);
1370
1386
  }
1387
+ let replacedWord = pronunciation + lowerCased.substring(possibleRootLowerCased.length);
1388
+ fsmParse = this.analysis(replacedWord, this.isProperNoun(surfaceForm));
1389
+ }
1390
+ else {
1391
+ if (this.dictionary.getWord(possibleRootLowerCased) != null) {
1392
+ this.dictionary.getWord(possibleRootLowerCased).addFlag("IS_OA");
1393
+ }
1394
+ else {
1395
+ newWord = new TxtWord_1.TxtWord(possibleRootLowerCased, "IS_OA");
1396
+ this.dictionaryTrie.addWord(possibleRootLowerCased, newWord);
1397
+ }
1398
+ fsmParse = this.analysis(lowerCased, this.isProperNoun(surfaceForm));
1371
1399
  }
1372
1400
  }
1373
1401
  }
@@ -1379,30 +1407,29 @@
1379
1407
  }
1380
1408
  }
1381
1409
  }
1382
- if (!isRootReplaced) {
1383
- for (let parse of fsmParse) {
1384
- parse.restoreOriginalForm(possibleRootLowerCased, pronunciation);
1385
- }
1386
- }
1387
- let fsmParseList = new FsmParseList_1.FsmParseList(fsmParse);
1388
- if (this.cache != undefined && fsmParseList.size() > 0) {
1389
- this.cache.add(surfaceForm, fsmParseList);
1410
+ }
1411
+ if (!isRootReplaced) {
1412
+ for (let parse of fsmParse) {
1413
+ parse.restoreOriginalForm(possibleRootLowerCased, pronunciation);
1390
1414
  }
1391
- return fsmParseList;
1392
1415
  }
1393
- /**
1394
- * The morphologicalAnalysisExists method calls analysisExists to check the existence of the analysis with given
1395
- * root and surfaceForm.
1396
- *
1397
- * @param surfaceForm String to check.
1398
- * @param rootWord TxtWord input root.
1399
- * @return true an analysis exists, otherwise return false.
1400
- */
1401
- morphologicalAnalysisExists(rootWord, surfaceForm) {
1402
- return this.analysisExists(rootWord, surfaceForm.toLocaleLowerCase("tr"), true);
1416
+ let fsmParseList = new FsmParseList_1.FsmParseList(fsmParse);
1417
+ if (this.cache != undefined && fsmParseList.size() > 0) {
1418
+ this.cache.add(surfaceForm, fsmParseList);
1403
1419
  }
1420
+ return fsmParseList;
1404
1421
  }
1405
- exports.FsmMorphologicalAnalyzer = FsmMorphologicalAnalyzer;
1406
- FsmMorphologicalAnalyzer.MAX_DISTANCE = 2;
1407
- });
1422
+ /**
1423
+ * The morphologicalAnalysisExists method calls analysisExists to check the existence of the analysis with given
1424
+ * root and surfaceForm.
1425
+ *
1426
+ * @param surfaceForm String to check.
1427
+ * @param rootWord TxtWord input root.
1428
+ * @return true an analysis exists, otherwise return false.
1429
+ */
1430
+ morphologicalAnalysisExists(rootWord, surfaceForm) {
1431
+ return this.analysisExists(rootWord, surfaceForm.toLocaleLowerCase("tr"), true);
1432
+ }
1433
+ }
1434
+ exports.FsmMorphologicalAnalyzer = FsmMorphologicalAnalyzer;
1408
1435
  //# sourceMappingURL=FsmMorphologicalAnalyzer.js.map