npm - nlptoolkit-morphologicalanalysis - Versions diffs - 1.0.14 → 1.0.15 - Mend

nlptoolkit-morphologicalanalysis 1.0.14 → 1.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (83) hide show

package/README.md +1 -1
package/dist/Corpus/DisambiguationCorpus.d.ts +7 -0
package/dist/Corpus/DisambiguationCorpus.js +7 -0
package/dist/Corpus/DisambiguationCorpus.js.map +1 -1
package/dist/MorphologicalAnalysis/FsmMorphologicalAnalyzer.d.ts +67 -3
package/dist/MorphologicalAnalysis/FsmMorphologicalAnalyzer.js +136 -64
package/dist/MorphologicalAnalysis/FsmMorphologicalAnalyzer.js.map +1 -1
package/dist/MorphologicalAnalysis/FsmParse.d.ts +9 -0
package/dist/MorphologicalAnalysis/FsmParse.js +15 -0
package/dist/MorphologicalAnalysis/FsmParse.js.map +1 -1
package/dist/MorphologicalAnalysis/InflectionalGroup.js +3 -2
package/dist/MorphologicalAnalysis/InflectionalGroup.js.map +1 -1
package/dist/MorphologicalAnalysis/MorphologicalParse.d.ts +98 -0
package/dist/MorphologicalAnalysis/MorphologicalParse.js +161 -10
package/dist/MorphologicalAnalysis/MorphologicalParse.js.map +1 -1
package/dist/MorphologicalAnalysis/MorphologicalTag.d.ts +9 -1
package/dist/MorphologicalAnalysis/MorphologicalTag.js +8 -0
package/dist/MorphologicalAnalysis/MorphologicalTag.js.map +1 -1
package/dist/MorphologicalAnalysis/MorphotacticEngine.d.ts +47 -0
package/dist/MorphologicalAnalysis/MorphotacticEngine.js +51 -1
package/dist/MorphologicalAnalysis/MorphotacticEngine.js.map +1 -1
package/dist/MorphologicalAnalysis/Transition.d.ts +22 -8
package/dist/MorphologicalAnalysis/Transition.js +25 -9
package/dist/MorphologicalAnalysis/Transition.js.map +1 -1
package/package.json +2 -2
package/parses/ac/314/247/304/261kla.txt +57 -3
package/parses/ak.txt +72 -3
package/parses/aksa.txt +40 -2
package/parses/anla.txt +57 -3
package/parses/azal.txt +63 -4
package/parses/bo/314/210l.txt +53 -3
package/parses/bul.txt +53 -3
package/parses/cenk.txt +8 -0
package/parses/cevapla.txt +74 -4
package/parses/cos/314/247.txt +53 -3
package/parses/c/314/247o/314/210k.txt +54 -3
package/parses/c/314/247/304/261k.txt +59 -3
package/parses/del.txt +47 -3
package/parses/doldur.txt +47 -3
package/parses/emlak.txt +2 -0
package/parses/git.txt +59 -3
package/parses/giy.txt +59 -3
package/parses/go/314/210c/314/247.txt +59 -3
package/parses/go/314/210ster.txt +63 -4
package/parses/hal.txt +20 -4
package/parses/kalp.txt +29 -4
package/parses/kavur.txt +80 -5
package/parses/kaydol.txt +69 -4
package/parses/resim.txt +14 -0
package/parses/s/304/261ska.txt +24 -0
package/parses/ye.txt +40 -2
package/parses/yemek.txt +6 -0
package/parses/y/304/261ka.txt +90 -5
package/parses/y/304/261ldo/314/210nu/314/210mu/314/210.txt +6 -0
package/pronunciations.txt +490 -0
package/source/Corpus/DisambiguationCorpus.ts +7 -0
package/source/MorphologicalAnalysis/FsmMorphologicalAnalyzer.ts +141 -67
package/source/MorphologicalAnalysis/FsmParse.ts +16 -1
package/source/MorphologicalAnalysis/InflectionalGroup.ts +3 -2
package/source/MorphologicalAnalysis/MorphologicalParse.ts +161 -10
package/source/MorphologicalAnalysis/MorphologicalTag.ts +9 -1
package/source/MorphologicalAnalysis/MorphotacticEngine.ts +51 -1
package/source/MorphologicalAnalysis/Transition.ts +25 -9
package/tests/DisambiguationCorpusTest.js +14 -0
package/tests/DisambiguationCorpusTest.js.map +1 -0
package/tests/FiniteStateMachineTest.js +96 -0
package/tests/FiniteStateMachineTest.js.map +1 -0
package/tests/FiniteStateMachineTest.ts +1 -1
package/tests/FsmMorphologicalAnalyzerTest.js +250 -0
package/tests/FsmMorphologicalAnalyzerTest.js.map +1 -0
package/tests/FsmMorphologicalAnalyzerTest.ts +9 -10
package/tests/FsmParseListTest.js +100 -0
package/tests/FsmParseListTest.js.map +1 -0
package/tests/FsmParseTest.js +68 -0
package/tests/FsmParseTest.js.map +1 -0
package/tests/InflectionalGroupTest.js +86 -0
package/tests/InflectionalGroupTest.js.map +1 -0
package/tests/MorphologicalParseTest.js +154 -0
package/tests/MorphologicalParseTest.js.map +1 -0
package/tests/TransitionTest.js +184 -0
package/tests/TransitionTest.js.map +1 -0
package/tests/TransitionTest.ts +8 -0
package/turkish_finite_state_machine.xml +11 -3

package/README.md CHANGED Viewed

@@ -32,7 +32,7 @@ For Developers
 ============
 You can also see [Python](https://github.com/starlangsoftware/TurkishMorphologicalAnalysis-Py),
-[Java](https://github.com/starlangsoftware/TurkishMorphologicalAnalysis), [C++](https://github.com/starlangsoftware/TurkishMorphologicalAnalysis-CPP),
+[Java](https://github.com/starlangsoftware/TurkishMorphologicalAnalysis), [C++](https://github.com/starlangsoftware/TurkishMorphologicalAnalysis-CPP), [C](https://github.com/starlangsoftware/TurkishMorphologicalAnalysis-C),
 [Swift](https://github.com/starlangsoftware/TurkishMorphologicalAnalysis-Swift), [Cython](https://github.com/starlangsoftware/TurkishMorphologicalAnalysis-Cy),
 or [C#](https://github.com/starlangsoftware/TurkishMorphologicalAnalysis-CS) repository.

package/dist/Corpus/DisambiguationCorpus.d.ts CHANGED Viewed

@@ -1,4 +1,11 @@
 import { Corpus } from "nlptoolkit-corpus/dist/Corpus";
 export declare class DisambiguationCorpus extends Corpus {
+    /**
+     * Constructor which takes a file name {@link String} as an input and reads the file line by line. It takes each word of the line,
+     * and creates a new {@link DisambiguatedWord} with current word and its {@link MorphologicalParse}. It also creates a new {@link Sentence}
+     * when a new sentence starts, and adds each word to this sentence till the end of that sentence.
+     *
+     * @param fileName File which will be read and parsed.
+     */
     constructor(fileName?: string);
 }

package/dist/Corpus/DisambiguationCorpus.js CHANGED Viewed

@@ -16,6 +16,13 @@
     const MorphologicalParse_1 = require("../MorphologicalAnalysis/MorphologicalParse");
     const Sentence_1 = require("nlptoolkit-corpus/dist/Sentence");
     class DisambiguationCorpus extends Corpus_1.Corpus {
+        /**
+         * Constructor which takes a file name {@link String} as an input and reads the file line by line. It takes each word of the line,
+         * and creates a new {@link DisambiguatedWord} with current word and its {@link MorphologicalParse}. It also creates a new {@link Sentence}
+         * when a new sentence starts, and adds each word to this sentence till the end of that sentence.
+         *
+         * @param fileName File which will be read and parsed.
+         */
         constructor(fileName) {
             super();
             if (fileName != undefined) {

package/dist/Corpus/DisambiguationCorpus.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"DisambiguationCorpus.js","sourceRoot":"","sources":["../../source/Corpus/DisambiguationCorpus.ts"],"names":[],"mappings":";;;;;;;;;;;;IAAA,0DAAqD;IACrD,yBAAyB;IACzB,2DAAsD;IACtD,oFAA+E;IAC/E,8DAAyD;IAEzD,MAAa,oBAAqB,SAAQ,eAAM;QAE5C,YAAY,QAAiB;YACzB,KAAK,EAAE,CAAC;YACR,IAAI,QAAQ,IAAI,SAAS,EAAC;gBACtB,IAAI,WAAW,GAAG,SAAS,CAAC;gBAC5B,IAAI,IAAI,GAAG,EAAE,CAAC,YAAY,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAA;gBAC5C,IAAI,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;gBAC5B,KAAK,IAAI,IAAI,IAAI,KAAK,EAAE;oBACpB,IAAI,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC;oBACjD,IAAI,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;oBACnD,IAAI,IAAI,IAAI,EAAE,IAAI,KAAK,IAAI,EAAE,EAAE;wBAC3B,IAAI,OAAO,GAAG,IAAI,qCAAiB,CAAC,IAAI,EAAE,IAAI,uCAAkB,CAAC,KAAK,CAAC,CAAC,CAAC;wBACzE,IAAI,IAAI,IAAI,KAAK,EAAE;4BACf,WAAW,GAAG,IAAI,mBAAQ,EAAE,CAAC;yBAChC;6BAAM;4BACH,IAAI,IAAI,IAAI,MAAM,EAAE;gCAChB,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC,CAAC;6BACjC;iCAAM;gCACH,IAAI,IAAI,IAAI,OAAO,IAAI,IAAI,IAAI,QAAQ,IAAI,IAAI,IAAI,SAAS,IAAI,IAAI,IAAI,UAAU,EAAE;iCACnF;qCAAM;oCACH,IAAI,WAAW,IAAI,IAAI,EAAE;wCACrB,WAAW,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;qCAChC;iCACJ;6BACJ;yBACJ;qBACJ;iBACJ;aACJ;QACL,CAAC;KAEJ;~~IAhCD~~,~~oDAgCC~~"}
1	+ {"version":3,"file":"DisambiguationCorpus.js","sourceRoot":"","sources":["../../source/Corpus/DisambiguationCorpus.ts"],"names":[],"mappings":";;;;;;;;;;;;IAAA,0DAAqD;IACrD,yBAAyB;IACzB,2DAAsD;IACtD,oFAA+E;IAC/E,8DAAyD;IAEzD,MAAa,oBAAqB,SAAQ,eAAM;QAE5C;;;;;;WAMG;QACH,YAAY,QAAiB;YACzB,KAAK,EAAE,CAAC;YACR,IAAI,QAAQ,IAAI,SAAS,EAAC;gBACtB,IAAI,WAAW,GAAG,SAAS,CAAC;gBAC5B,IAAI,IAAI,GAAG,EAAE,CAAC,YAAY,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAA;gBAC5C,IAAI,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;gBAC5B,KAAK,IAAI,IAAI,IAAI,KAAK,EAAE;oBACpB,IAAI,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC;oBACjD,IAAI,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;oBACnD,IAAI,IAAI,IAAI,EAAE,IAAI,KAAK,IAAI,EAAE,EAAE;wBAC3B,IAAI,OAAO,GAAG,IAAI,qCAAiB,CAAC,IAAI,EAAE,IAAI,uCAAkB,CAAC,KAAK,CAAC,CAAC,CAAC;wBACzE,IAAI,IAAI,IAAI,KAAK,EAAE;4BACf,WAAW,GAAG,IAAI,mBAAQ,EAAE,CAAC;yBAChC;6BAAM;4BACH,IAAI,IAAI,IAAI,MAAM,EAAE;gCAChB,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC,CAAC;6BACjC;iCAAM;gCACH,IAAI,IAAI,IAAI,OAAO,IAAI,IAAI,IAAI,QAAQ,IAAI,IAAI,IAAI,SAAS,IAAI,IAAI,IAAI,UAAU,EAAE;iCACnF;qCAAM;oCACH,IAAI,WAAW,IAAI,IAAI,EAAE;wCACrB,WAAW,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;qCAChC;iCACJ;6BACJ;yBACJ;qBACJ;iBACJ;aACJ;QACL,CAAC;KAEJ;IAvCD,oDAuCC"}

package/dist/MorphologicalAnalysis/FsmMorphologicalAnalyzer.d.ts CHANGED Viewed

@@ -10,10 +10,11 @@ export declare class FsmMorphologicalAnalyzer {
     private dictionaryTrie;
     private suffixTrie;
     private parsedSurfaceForms;
-    private finiteStateMachine;
+    private pronunciations;
+    private readonly finiteStateMachine;
     private static MAX_DISTANCE;
-    private dictionary;
-    private cache;
+    private readonly dictionary;
+    private readonly cache;
     private mostUsedPatterns;
     /**
      * Another constructor of FsmMorphologicalAnalyzer class. It generates a new TxtDictionary type dictionary from
@@ -24,9 +25,29 @@ export declare class FsmMorphologicalAnalyzer {
      * @param cacheSize  the size of the LRUCache.
      */
     constructor(fileName?: string, dictionaryFileNameOrDictionary?: any, cacheSize?: number);
+    /**
+     * Constructs and returns the reverse string of a given string.
+     * @param s String to be reversed.
+     * @return Reverse of a given string.
+     */
     private reverseString;
+    /**
+     * Constructs the suffix trie from the input file suffixes.txt. suffixes.txt contains the most frequent 6000
+     * suffixes that a verb or a noun can take. The suffix trie is a trie that stores these suffixes in reverse form,
+     * which can be then used to match a given word for its possible suffix content.
+     */
     private prepareSuffixTrie;
+    /**
+     * Reads the file for correct surface forms and their most frequent root forms, in other words, the surface forms
+     * which have at least one morphological analysis in  Turkish.
+     * @param fileName Input file containing analyzable surface forms and their root forms.
+     */
     addParsedSurfaceForms(fileName: string): void;
+    /**
+     * Reads the file for foreign words and their pronunciations.
+     * @param fileName Input file containing foreign words and their pronunciations.
+     */
+    addPronunciations(fileName: string): void;
     /**
      * The getPossibleWords method takes {@link MorphologicalParse} and {@link MetamorphicParse} as input.
      * First it determines whether the given morphologicalParse is the root verb and whether it contains a verb tag.
@@ -319,6 +340,15 @@ export declare class FsmMorphologicalAnalyzer {
      * @return ArrayList type initialFsmParse which holds the analyses.
      */
     analysis(surfaceForm: string, isProper: boolean): Array<FsmParse>;
+    /**
+     * This method uses cache idea to speed up pattern matching in Fsm. mostUsedPatterns stores the compiled forms of
+     * the previously used patterns. When Fsm tries to match a string to a pattern, first we check if it exists in
+     * mostUsedPatterns. If it exists, we directly use the compiled pattern to match the string. Otherwise, new pattern
+     * is compiled and put in the mostUsedPatterns.
+     * @param expr Pattern to check
+     * @param value String to match the pattern
+     * @return True if the string matches the pattern, false otherwise.
+     */
     private patternMatches;
     /**
      * The isProperNoun method takes surfaceForm String as input and checks its each char whether they are in the range
@@ -335,6 +365,19 @@ export declare class FsmMorphologicalAnalyzer {
      * @return true if it is a code-like word, return false otherwise.
      */
     isCode(surfaceForm: string): boolean;
+    /**
+     * Identifies a possible new root word for a given surface form. It also adds the new root form to the dictionary
+     * for further usage. The method first searches the suffix trie for the reverse string of the surface form. This
+     * way, it can identify if the word has a suffix that is in the most frequently used suffix list. Since a word can
+     * have multiple possible suffixes, the method identifies the longest suffix and returns the substring of the
+     * surface form tht does not contain the suffix. Let say the word is 'googlelaştırdık', it will identify 'tık' as
+     * a suffix and will return 'googlelaştır' as a possible root form. Another example will be 'homelesslerimizle', it
+     * will identify 'lerimizle' as suffix and will return 'homeless' as a possible root form. If the root word ends
+     * with 'ğ', it is replacesd with 'k'. 'morfolojikliğini' will return 'morfolojikliğ' then which will be replaced
+     * with 'morfolojiklik'.
+     * @param surfaceForm Surface form for which we will identify a possible new root form.
+     * @return Possible new root form.
+     */
     private rootOfPossiblyNewWord;
     /**
      * The robustMorphologicalAnalysis is used to analyse surfaceForm String. First it gets the currentParse of the surfaceForm
@@ -383,9 +426,30 @@ export declare class FsmMorphologicalAnalyzer {
      * @return true if surfaceForm matches with the regex.
      */
     private isNumber;
+    /**
+     * Checks if a given surface form matches to a percent value. It should be something like %4, %45, %4.3 or %56.786
+     * @param surfaceForm Surface form to be checked.
+     * @return True if the surface form is in percent form
+     */
     private isPercent;
+    /**
+     * Checks if a given surface form matches to a time form. It should be something like 3:34, 12:56 etc.
+     * @param surfaceForm Surface form to be checked.
+     * @return True if the surface form is in time form
+     */
     private isTime;
+    /**
+     * Checks if a given surface form matches to a range form. It should be something like 123-1400 or 12:34-15:78 or
+     * 3.45-4.67.
+     * @param surfaceForm Surface form to be checked.
+     * @return True if the surface form is in range form
+     */
     private isRange;
+    /**
+     * Checks if a given surface form matches to a date form. It should be something like 3/10/2023 or 2.3.2012
+     * @param surfaceForm Surface form to be checked.
+     * @return True if the surface form is in date form
+     */
     private isDate;
     /**
      * The morphologicalAnalysis method is used to analyse a FsmParseList by comparing with the regex.

package/dist/MorphologicalAnalysis/FsmMorphologicalAnalyzer.js CHANGED Viewed

@@ -4,7 +4,7 @@
         if (v !== undefined) module.exports = v;
     }
     else if (typeof define === "function" && define.amd) {
-        define(["require", "exports", "nlptoolkit-dictionary/dist/Dictionary/Trie/Trie", "./FiniteStateMachine", "nlptoolkit-dictionary/dist/Dictionary/TxtDictionary", "nlptoolkit-datastructure/dist/LRUCache", "./FsmParseList", "nlptoolkit-dictionary/dist/Dictionary/WordComparator", "fs", "./Transition", "./MorphologicalTag", "nlptoolkit-dictionary/dist/Dictionary/TxtWord", "./FsmParse", "nlptoolkit-corpus/dist/Sentence", "nlptoolkit-dictionary/dist/Dictionary/Word", "./State", "nlptoolkit-datastructure/dist/Queue"], factory);
+        define(["require", "exports", "nlptoolkit-dictionary/dist/Dictionary/Trie/Trie", "./FiniteStateMachine", "nlptoolkit-dictionary/dist/Dictionary/TxtDictionary", "nlptoolkit-datastructure/dist/LRUCache", "./FsmParseList", "nlptoolkit-dictionary/dist/Dictionary/WordComparator", "fs", "./Transition", "./MorphologicalTag", "nlptoolkit-dictionary/dist/Dictionary/TxtWord", "./FsmParse", "nlptoolkit-corpus/dist/Sentence", "nlptoolkit-dictionary/dist/Dictionary/Word", "./State", "nlptoolkit-datastructure/dist/Queue", "nlptoolkit-util/dist/FileUtils"], factory);
     }
 })(function (require, exports) {
     "use strict";
@@ -25,6 +25,7 @@
     const Word_1 = require("nlptoolkit-dictionary/dist/Dictionary/Word");
     const State_1 = require("./State");
     const Queue_1 = require("nlptoolkit-datastructure/dist/Queue");
+    const FileUtils_1 = require("nlptoolkit-util/dist/FileUtils");
     class FsmMorphologicalAnalyzer {
         /**
          * Another constructor of FsmMorphologicalAnalyzer class. It generates a new TxtDictionary type dictionary from
@@ -36,6 +37,7 @@
          */
         constructor(fileName, dictionaryFileNameOrDictionary, cacheSize) {
             this.parsedSurfaceForms = undefined;
+            this.pronunciations = undefined;
             this.cache = undefined;
             this.mostUsedPatterns = new Map();
             if (dictionaryFileNameOrDictionary == undefined) {
@@ -60,7 +62,13 @@
             if (cacheSize > 0) {
                 this.cache = new LRUCache_1.LRUCache(cacheSize);
             }
+            this.addPronunciations("pronunciations.txt");
         }
+        /**
+         * Constructs and returns the reverse string of a given string.
+         * @param s String to be reversed.
+         * @return Reverse of a given string.
+         */
         reverseString(s) {
             let result = "";
             for (let i = s.length - 1; i >= 0; i--) {
@@ -68,6 +76,11 @@
             }
             return result;
         }
+        /**
+         * Constructs the suffix trie from the input file suffixes.txt. suffixes.txt contains the most frequent 6000
+         * suffixes that a verb or a noun can take. The suffix trie is a trie that stores these suffixes in reverse form,
+         * which can be then used to match a given word for its possible suffix content.
+         */
         prepareSuffixTrie() {
             this.suffixTrie = new Trie_1.Trie();
             let data = fs.readFileSync("suffixes.txt", 'utf8');
@@ -77,14 +90,20 @@
                 this.suffixTrie.addWord(reverseSuffix, new Word_1.Word(reverseSuffix));
             }
         }
+        /**
+         * Reads the file for correct surface forms and their most frequent root forms, in other words, the surface forms
+         * which have at least one morphological analysis in  Turkish.
+         * @param fileName Input file containing analyzable surface forms and their root forms.
+         */
         addParsedSurfaceForms(fileName) {
-            this.parsedSurfaceForms = new Map();
-            let data = fs.readFileSync(fileName, 'utf8');
-            let lines = data.split("\n");
-            for (let line of lines) {
-                let items = line.split(" ");
-                this.parsedSurfaceForms.set(items[0], items[1]);
-            }
+            this.parsedSurfaceForms = FileUtils_1.FileUtils.readHashMap(fileName);
+        }
+        /**
+         * Reads the file for foreign words and their pronunciations.
+         * @param fileName Input file containing foreign words and their pronunciations.
+         */
+        addPronunciations(fileName) {
+            this.pronunciations = FileUtils_1.FileUtils.readHashMap(fileName);
         }
         /**
          * The getPossibleWords method takes {@link MorphologicalParse} and {@link MetamorphicParse} as input.
@@ -677,7 +696,7 @@
          */
         parseWordLength(fsmParse, maxLength) {
             let result = new Array();
-            let resultSuffixList = new Array();
+            let resultTransitionList = new Array();
             let parseQueue = new Queue_1.Queue(1000);
             parseQueue.enqueueAll(fsmParse);
             while (!parseQueue.isEmpty()) {
@@ -687,11 +706,11 @@
                 let currentState = currentFsmParse.getFinalSuffix();
                 let currentSurfaceForm = currentFsmParse.getSurfaceForm();
                 if (currentState.isEndState() && currentSurfaceForm.length <= maxLength) {
-                    let currentSuffixList = currentFsmParse.getSuffixList();
-                    if (!resultSuffixList.includes(currentSuffixList)) {
+                    let currentTransitionList = currentSurfaceForm + " " + currentFsmParse.getFsmParseTransitionList();
+                    if (!resultTransitionList.includes(currentTransitionList)) {
                         result.push(currentFsmParse);
                         currentFsmParse.constructInflectionalGroups();
-                        resultSuffixList.push(currentSuffixList);
+                        resultTransitionList.push(currentTransitionList);
                     }
                 }
                 this.addNewParsesFromCurrentParseLength(currentFsmParse, parseQueue, maxLength, root);
@@ -708,7 +727,7 @@
          */
         parseWordSurfaceForm(fsmParse, surfaceForm) {
             let result = new Array();
-            let resultSuffixList = new Array();
+            let resultTransitionList = new Array();
             let parseQueue = new Queue_1.Queue(1000);
             parseQueue.enqueueAll(fsmParse);
             while (!parseQueue.isEmpty()) {
@@ -718,11 +737,11 @@
                 let currentState = currentFsmParse.getFinalSuffix();
                 let currentSurfaceForm = currentFsmParse.getSurfaceForm();
                 if (currentState.isEndState() && currentSurfaceForm == surfaceForm) {
-                    let currentSuffixList = currentFsmParse.getSuffixList();
-                    if (!resultSuffixList.includes(currentSuffixList)) {
+                    let currentTransitionList = currentFsmParse.getFsmParseTransitionList();
+                    if (!resultTransitionList.includes(currentTransitionList)) {
                         result.push(currentFsmParse);
                         currentFsmParse.constructInflectionalGroups();
-                        resultSuffixList.push(currentSuffixList);
+                        resultTransitionList.push(currentTransitionList);
                     }
                 }
                 this.addNewParsesFromCurrentParseSurfaceForm(currentFsmParse, parseQueue, surfaceForm, root);
@@ -986,6 +1005,15 @@
             initialFsmParse = this.initializeParseListFromSurfaceForm(surfaceForm, isProper);
             return this.parseWordSurfaceForm(initialFsmParse, surfaceForm);
         }
+        /**
+         * This method uses cache idea to speed up pattern matching in Fsm. mostUsedPatterns stores the compiled forms of
+         * the previously used patterns. When Fsm tries to match a string to a pattern, first we check if it exists in
+         * mostUsedPatterns. If it exists, we directly use the compiled pattern to match the string. Otherwise, new pattern
+         * is compiled and put in the mostUsedPatterns.
+         * @param expr Pattern to check
+         * @param value String to match the pattern
+         * @return True if the string matches the pattern, false otherwise.
+         */
         patternMatches(expr, value) {
             let p = this.mostUsedPatterns.get(expr);
             if (p == undefined) {
@@ -1021,31 +1049,39 @@
             }
             return this.patternMatches("^.*[0-9].*$", surfaceForm) && this.patternMatches("^.*[a-zA-ZçöğüşıÇÖĞÜŞİ].*$", surfaceForm);
         }
+        /**
+         * Identifies a possible new root word for a given surface form. It also adds the new root form to the dictionary
+         * for further usage. The method first searches the suffix trie for the reverse string of the surface form. This
+         * way, it can identify if the word has a suffix that is in the most frequently used suffix list. Since a word can
+         * have multiple possible suffixes, the method identifies the longest suffix and returns the substring of the
+         * surface form tht does not contain the suffix. Let say the word is 'googlelaştırdık', it will identify 'tık' as
+         * a suffix and will return 'googlelaştır' as a possible root form. Another example will be 'homelesslerimizle', it
+         * will identify 'lerimizle' as suffix and will return 'homeless' as a possible root form. If the root word ends
+         * with 'ğ', it is replacesd with 'k'. 'morfolojikliğini' will return 'morfolojikliğ' then which will be replaced
+         * with 'morfolojiklik'.
+         * @param surfaceForm Surface form for which we will identify a possible new root form.
+         * @return Possible new root form.
+         */
         rootOfPossiblyNewWord(surfaceForm) {
             let words = this.suffixTrie.getWordsWithPrefix(this.reverseString(surfaceForm));
-            let maxLength = 0;
-            let longestWord = null;
+            let candidateWord = null;
+            let candidateList = new Array();
             for (let word of words) {
-                if (word.getName().length > maxLength) {
-                    longestWord = surfaceForm.substring(0, surfaceForm.length - word.getName().length);
-                    maxLength = word.getName().length;
-                }
-            }
-            if (maxLength != 0) {
+                candidateWord = surfaceForm.substring(0, surfaceForm.length - word.getName().length);
                 let newWord;
-                if (longestWord.endsWith("ğ")) {
-                    longestWord = longestWord.substring(0, longestWord.length - 1) + "k";
-                    newWord = new TxtWord_1.TxtWord(longestWord, "CL_ISIM");
+                if (candidateWord.endsWith("ğ")) {
+                    candidateWord = candidateWord.substring(0, candidateWord.length - 1) + "k";
+                    newWord = new TxtWord_1.TxtWord(candidateWord, "CL_ISIM");
                     newWord.addFlag("IS_SD");
                 }
                 else {
-                    newWord = new TxtWord_1.TxtWord(longestWord, "CL_ISIM");
+                    newWord = new TxtWord_1.TxtWord(candidateWord, "CL_ISIM");
                     newWord.addFlag("CL_FIIL");
                 }
-                this.dictionaryTrie.addWord(longestWord, newWord);
-                return newWord;
+                candidateList.push(newWord);
+                this.dictionaryTrie.addWord(candidateWord, newWord);
             }
-            return null;
+            return candidateList;
         }
         /**
          * The robustMorphologicalAnalysis is used to analyse surfaceForm String. First it gets the currentParse of the surfaceForm
@@ -1066,21 +1102,17 @@
                 if (this.isProperNoun(surfaceForm)) {
                     fsmParse.push(new FsmParse_1.FsmParse(surfaceForm, this.finiteStateMachine.getState("ProperRoot")));
                 }
-                else {
-                    if (this.isCode(surfaceForm)) {
-                        fsmParse.push(new FsmParse_1.FsmParse(surfaceForm, this.finiteStateMachine.getState("CodeRoot")));
-                    }
-                    else {
-                        let newRoot = this.rootOfPossiblyNewWord(surfaceForm);
-                        if (newRoot != null) {
-                            fsmParse.push(new FsmParse_1.FsmParse(newRoot, this.finiteStateMachine.getState("VerbalRoot")));
-                            fsmParse.push(new FsmParse_1.FsmParse(newRoot, this.finiteStateMachine.getState("NominalRoot")));
-                        }
-                        else {
-                            fsmParse.push(new FsmParse_1.FsmParse(surfaceForm, this.finiteStateMachine.getState("NominalRoot")));
-                        }
+                if (this.isCode(surfaceForm)) {
+                    fsmParse.push(new FsmParse_1.FsmParse(surfaceForm, this.finiteStateMachine.getState("CodeRoot")));
+                }
+                let newCandidateList = this.rootOfPossiblyNewWord(surfaceForm);
+                if (newCandidateList.length != 0) {
+                    for (let word of newCandidateList) {
+                        fsmParse.push(new FsmParse_1.FsmParse(word, this.finiteStateMachine.getState("VerbalRoot")));
+                        fsmParse.push(new FsmParse_1.FsmParse(word, this.finiteStateMachine.getState("NominalRoot")));
                     }
                 }
+                fsmParse.push(new FsmParse_1.FsmParse(surfaceForm, this.finiteStateMachine.getState("NominalRoot")));
                 return new FsmParseList_1.FsmParseList(this.parseWordSurfaceForm(fsmParse, surfaceForm));
             }
             else {
@@ -1186,19 +1218,40 @@
             }
             return word == "" && count > 1;
         }
+        /**
+         * Checks if a given surface form matches to a percent value. It should be something like %4, %45, %4.3 or %56.786
+         * @param surfaceForm Surface form to be checked.
+         * @return True if the surface form is in percent form
+         */
         isPercent(surfaceForm) {
             return this.patternMatches("^%(\\d\\d|\\d)$", surfaceForm) ||
                 this.patternMatches("^%(\\d\\d|\\d)\\.\\d+$", surfaceForm);
         }
+        /**
+         * Checks if a given surface form matches to a time form. It should be something like 3:34, 12:56 etc.
+         * @param surfaceForm Surface form to be checked.
+         * @return True if the surface form is in time form
+         */
         isTime(surfaceForm) {
             return this.patternMatches("^(\\d\\d|\\d):(\\d\\d|\\d):(\\d\\d|\\d)$", surfaceForm) ||
                 this.patternMatches("^(\\d\\d|\\d):(\\d\\d|\\d)$", surfaceForm);
         }
+        /**
+         * Checks if a given surface form matches to a range form. It should be something like 123-1400 or 12:34-15:78 or
+         * 3.45-4.67.
+         * @param surfaceForm Surface form to be checked.
+         * @return True if the surface form is in range form
+         */
         isRange(surfaceForm) {
             return this.patternMatches("^\\d+-\\d+$", surfaceForm) ||
                 this.patternMatches("^(\\d\\d|\\d):(\\d\\d|\\d)-(\\d\\d|\\d):(\\d\\d|\\d)$", surfaceForm) ||
                 this.patternMatches("^(\\d\\d|\\d)\\.(\\d\\d|\\d)-(\\d\\d|\\d)\\.(\\d\\d|\\d)$", surfaceForm);
         }
+        /**
+         * Checks if a given surface form matches to a date form. It should be something like 3/10/2023 or 2.3.2012
+         * @param surfaceForm Surface form to be checked.
+         * @return True if the surface form is in date form
+         */
         isDate(surfaceForm) {
             return this.patternMatches("^(\\d\\d|\\d)/(\\d\\d|\\d)/\\d+$", surfaceForm) ||
                 this.patternMatches("^(\\d\\d|\\d)\\.(\\d\\d|\\d)\\.\\d+$", surfaceForm);
@@ -1222,6 +1275,8 @@
          */
         morphologicalAnalysis(surfaceForm) {
             let lowerCased = surfaceForm.toLocaleLowerCase("tr");
+            let possibleRootLowerCased = "", pronunciation = "";
+            let isRootReplaced = false;
             if (this.parsedSurfaceForms != undefined && this.parsedSurfaceForms.has(lowerCased) &&
                 !this.isInteger(surfaceForm) && !this.isDouble(surfaceForm) && !this.isPercent(surfaceForm) &&
                 !this.isTime(surfaceForm) && !this.isRange(surfaceForm) && !this.isDate(surfaceForm)) {
@@ -1233,9 +1288,9 @@
                 return this.cache.get(surfaceForm);
             }
             if (this.patternMatches("^(\\w|Ç|Ş|İ|Ü|Ö)\\.$", surfaceForm)) {
-                this.dictionaryTrie.addWord(surfaceForm.toLocaleLowerCase("tr"), new TxtWord_1.TxtWord(surfaceForm.toLocaleLowerCase("tr"), "IS_OA"));
+                this.dictionaryTrie.addWord(lowerCased, new TxtWord_1.TxtWord(lowerCased, "IS_OA"));
             }
-            let defaultFsmParse = this.analysis(surfaceForm.toLocaleLowerCase("tr"), this.isProperNoun(surfaceForm));
+            let defaultFsmParse = this.analysis(lowerCased, this.isProperNoun(surfaceForm));
             if (defaultFsmParse.length > 0) {
                 let fsmParseList = new FsmParseList_1.FsmParseList(defaultFsmParse);
                 if (this.cache != undefined) {
@@ -1249,57 +1304,69 @@
                 if (possibleRoot != "") {
                     if (possibleRoot.includes("/") || possibleRoot.includes("\\/")) {
                         this.dictionaryTrie.addWord(possibleRoot, new TxtWord_1.TxtWord(possibleRoot, "IS_KESIR"));
-                        fsmParse = this.analysis(surfaceForm.toLocaleLowerCase("tr"), this.isProperNoun(surfaceForm));
+                        fsmParse = this.analysis(lowerCased, this.isProperNoun(surfaceForm));
                     }
                     else {
                         if (this.isDate(possibleRoot)) {
                             this.dictionaryTrie.addWord(possibleRoot, new TxtWord_1.TxtWord(possibleRoot, "IS_DATE"));
-                            fsmParse = this.analysis(surfaceForm.toLocaleLowerCase("tr"), this.isProperNoun(surfaceForm));
+                            fsmParse = this.analysis(lowerCased, this.isProperNoun(surfaceForm));
                         }
                         else {
                             if (this.patternMatches("^\\d+/\\d+$", possibleRoot)) {
                                 this.dictionaryTrie.addWord(possibleRoot, new TxtWord_1.TxtWord(possibleRoot, "IS_KESIR"));
-                                fsmParse = this.analysis(surfaceForm.toLocaleLowerCase("tr"), this.isProperNoun(surfaceForm));
+                                fsmParse = this.analysis(lowerCased, this.isProperNoun(surfaceForm));
                             }
                             else {
                                 if (this.isPercent(possibleRoot)) {
                                     this.dictionaryTrie.addWord(possibleRoot, new TxtWord_1.TxtWord(possibleRoot, "IS_PERCENT"));
-                                    fsmParse = this.analysis(surfaceForm.toLocaleLowerCase("tr"), this.isProperNoun(surfaceForm));
+                                    fsmParse = this.analysis(lowerCased, this.isProperNoun(surfaceForm));
                                 }
                                 else {
                                     if (this.isTime(surfaceForm)) {
                                         this.dictionaryTrie.addWord(possibleRoot, new TxtWord_1.TxtWord(possibleRoot, "IS_ZAMAN"));
-                                        fsmParse = this.analysis(surfaceForm.toLocaleLowerCase("tr"), this.isProperNoun(surfaceForm));
+                                        fsmParse = this.analysis(lowerCased, this.isProperNoun(surfaceForm));
                                     }
                                     else {
                                         if (this.isRange(surfaceForm)) {
                                             this.dictionaryTrie.addWord(possibleRoot, new TxtWord_1.TxtWord(possibleRoot, "IS_RANGE"));
-                                            fsmParse = this.analysis(surfaceForm.toLocaleLowerCase("tr"), this.isProperNoun(surfaceForm));
+                                            fsmParse = this.analysis(lowerCased, this.isProperNoun(surfaceForm));
                                         }
                                         else {
                                             if (this.isInteger(possibleRoot)) {
                                                 this.dictionaryTrie.addWord(possibleRoot, new TxtWord_1.TxtWord(possibleRoot, "IS_SAYI"));
-                                                fsmParse = this.analysis(surfaceForm.toLocaleLowerCase("tr"), this.isProperNoun(surfaceForm));
+                                                fsmParse = this.analysis(lowerCased, this.isProperNoun(surfaceForm));
                                             }
                                             else {
                                                 if (this.isDouble(possibleRoot)) {
                                                     this.dictionaryTrie.addWord(possibleRoot, new TxtWord_1.TxtWord(possibleRoot, "IS_REELSAYI"));
-                                                    fsmParse = this.analysis(surfaceForm.toLocaleLowerCase("tr"), this.isProperNoun(surfaceForm));
+                                                    fsmParse = this.analysis(lowerCased, this.isProperNoun(surfaceForm));
                                                 }
                                                 else {
-                                                    if (Word_1.Word.isCapital(possibleRoot)) {
+                                                    if (Word_1.Word.isCapital(possibleRoot) || "QXW".includes(possibleRoot.substring(0, 1))) {
                                                         let newWord = undefined;
-                                                        if (this.dictionary.getWord(possibleRoot.toLocaleLowerCase("tr")) != null) {
-                                                            this.dictionary.getWord(possibleRoot.toLocaleLowerCase("tr")).addFlag("IS_OA");
+                                                        possibleRootLowerCased = possibleRoot.toLocaleLowerCase("tr");
+                                                        if (this.pronunciations.has(possibleRootLowerCased)) {
+                                                            isRootReplaced = true;
+                                                            pronunciation = this.pronunciations.get(possibleRootLowerCased);
+                                                            if (this.dictionary.getWord(pronunciation) != null) {
+                                                                this.dictionary.getWord(pronunciation).addFlag("IS_OA");
+                                                            }
+                                                            else {
+                                                                newWord = new TxtWord_1.TxtWord(pronunciation, "IS_OA");
+                                                                this.dictionaryTrie.addWord(pronunciation, newWord);
+                                                            }
+                                                            let replacedWord = pronunciation + lowerCased.substring(possibleRootLowerCased.length);
+                                                            fsmParse = this.analysis(replacedWord, this.isProperNoun(surfaceForm));
                                                         }
                                                         else {
-                                                            newWord = new TxtWord_1.TxtWord(possibleRoot.toLocaleLowerCase("tr"), "IS_OA");
-                                                            this.dictionaryTrie.addWord(possibleRoot.toLocaleLowerCase("tr"), newWord);
-                                                        }
-                                                        fsmParse = this.analysis(surfaceForm.toLocaleLowerCase("tr"), this.isProperNoun(surfaceForm));
-                                                        if (fsmParse.length == 0 && newWord != undefined) {
-                                                            newWord.addFlag("IS_KIS");
-                                                            fsmParse = this.analysis(surfaceForm.toLocaleLowerCase("tr"), this.isProperNoun(surfaceForm));
+                                                            if (this.dictionary.getWord(possibleRootLowerCased) != null) {
+                                                                this.dictionary.getWord(possibleRootLowerCased).addFlag("IS_OA");
+                                                            }
+                                                            else {
+                                                                newWord = new TxtWord_1.TxtWord(possibleRootLowerCased, "IS_OA");
+                                                                this.dictionaryTrie.addWord(possibleRootLowerCased, newWord);
+                                                            }
+                                                            fsmParse = this.analysis(lowerCased, this.isProperNoun(surfaceForm));
                                                         }
                                                     }
                                                 }
@@ -1312,6 +1379,11 @@
                     }
                 }
             }
+            if (!isRootReplaced) {
+                for (let parse of fsmParse) {
+                    parse.restoreOriginalForm(possibleRootLowerCased, pronunciation);
+                }
+            }
             let fsmParseList = new FsmParseList_1.FsmParseList(fsmParse);
             if (this.cache != undefined && fsmParseList.size() > 0) {
                 this.cache.add(surfaceForm, fsmParseList);