npm - nlptoolkit-morphologicalanalysis - Versions diffs - 1.0.0 - Mend

nlptoolkit-morphologicalanalysis 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (65) hide show

package/README.md +144 -0
package/dist/Corpus/DisambiguatedWord.d.ts +20 -0
package/dist/Corpus/DisambiguatedWord.js +38 -0
package/dist/Corpus/DisambiguatedWord.js.map +1 -0
package/dist/Corpus/DisambiguationCorpus.d.ts +4 -0
package/dist/Corpus/DisambiguationCorpus.js +54 -0
package/dist/Corpus/DisambiguationCorpus.js.map +1 -0
package/dist/MorphologicalAnalysis/FiniteStateMachine.d.ts +63 -0
package/dist/MorphologicalAnalysis/FiniteStateMachine.js +178 -0
package/dist/MorphologicalAnalysis/FiniteStateMachine.js.map +1 -0
package/dist/MorphologicalAnalysis/FsmMorphologicalAnalyzer.d.ts +399 -0
package/dist/MorphologicalAnalysis/FsmMorphologicalAnalyzer.js +1255 -0
package/dist/MorphologicalAnalysis/FsmMorphologicalAnalyzer.js.map +1 -0
package/dist/MorphologicalAnalysis/FsmParse.d.ts +290 -0
package/dist/MorphologicalAnalysis/FsmParse.js +684 -0
package/dist/MorphologicalAnalysis/FsmParse.js.map +1 -0
package/dist/MorphologicalAnalysis/FsmParseList.d.ts +96 -0
package/dist/MorphologicalAnalysis/FsmParseList.js +242 -0
package/dist/MorphologicalAnalysis/FsmParseList.js.map +1 -0
package/dist/MorphologicalAnalysis/InflectionalGroup.d.ts +77 -0
package/dist/MorphologicalAnalysis/InflectionalGroup.js +213 -0
package/dist/MorphologicalAnalysis/InflectionalGroup.js.map +1 -0
package/dist/MorphologicalAnalysis/MetamorphicParse.d.ts +63 -0
package/dist/MorphologicalAnalysis/MetamorphicParse.js +592 -0
package/dist/MorphologicalAnalysis/MetamorphicParse.js.map +1 -0
package/dist/MorphologicalAnalysis/MorphologicalParse.d.ts +301 -0
package/dist/MorphologicalAnalysis/MorphologicalParse.js +969 -0
package/dist/MorphologicalAnalysis/MorphologicalParse.js.map +1 -0
package/dist/MorphologicalAnalysis/MorphologicalTag.d.ts +510 -0
package/dist/MorphologicalAnalysis/MorphologicalTag.js +525 -0
package/dist/MorphologicalAnalysis/MorphologicalTag.js.map +1 -0
package/dist/MorphologicalAnalysis/State.d.ts +40 -0
package/dist/MorphologicalAnalysis/State.js +64 -0
package/dist/MorphologicalAnalysis/State.js.map +1 -0
package/dist/MorphologicalAnalysis/Transition.d.ts +159 -0
package/dist/MorphologicalAnalysis/Transition.js +751 -0
package/dist/MorphologicalAnalysis/Transition.js.map +1 -0
package/index.js +12 -0
package/package.json +30 -0
package/penntreebank.txt +208431 -0
package/source/Corpus/DisambiguatedWord.ts +29 -0
package/source/Corpus/DisambiguationCorpus.ts +39 -0
package/source/MorphologicalAnalysis/FiniteStateMachine.ts +165 -0
package/source/MorphologicalAnalysis/FsmMorphologicalAnalyzer.ts +1256 -0
package/source/MorphologicalAnalysis/FsmParse.ts +664 -0
package/source/MorphologicalAnalysis/FsmParseList.ts +238 -0
package/source/MorphologicalAnalysis/InflectionalGroup.ts +210 -0
package/source/MorphologicalAnalysis/MetamorphicParse.ts +589 -0
package/source/MorphologicalAnalysis/MorphologicalParse.ts +995 -0
package/source/MorphologicalAnalysis/MorphologicalTag.ts +510 -0
package/source/MorphologicalAnalysis/State.ts +59 -0
package/source/MorphologicalAnalysis/Transition.ts +733 -0
package/source/tsconfig.json +13 -0
package/tests/DisambiguationCorpusTest.ts +12 -0
package/tests/FiniteStateMachineTest.ts +87 -0
package/tests/FsmMorphologicalAnalyzerTest.ts +204 -0
package/tests/FsmParseListTest.ts +90 -0
package/tests/FsmParseTest.ts +66 -0
package/tests/InflectionalGroupTest.ts +84 -0
package/tests/MorphologicalParseTest.ts +152 -0
package/tests/TransitionTest.ts +174 -0
package/tsconfig.json +15 -0
package/turkish_dictionary.txt +62120 -0
package/turkish_finite_state_machine.xml +1887 -0
package/turkish_misspellings.txt +148932 -0

package/README.md ADDED Viewed

@@ -0,0 +1,144 @@
+Morphological Analysis
+============
+## Morphology
+In linguistics, the term morphology refers to the study of the internal structure of words. Each word is assumed to consist of one or more morphemes, which can be defined as the smallest linguistic unit having a particular meaning or grammatical function. One can come across morphologically simplex words, i.e. roots, as well as morphologically complex ones, such as compounds or affixed forms.
+Batı-lı-laş-tır-ıl-ama-yan-lar-dan-mış-ız
+west-With-Make-Caus-Pass-Neg.Abil-Nom-Pl-Abl-Evid-A3Pl
+‘It appears that we are among the ones that cannot be westernized.’
+The morphemes that constitute a word combine in a (more or less) strict order. Most morphologically complex words are in the ”ROOT-SUFFIX1-SUFFIX2-...” structure. Affixes have two types: (i) derivational affixes, which change the meaning and sometimes also the grammatical category of the base they are attached to, and (ii) inflectional affixes serving particular grammatical functions. In general, derivational suffixes precede inflectional ones. The order of derivational suffixes is reflected on the meaning of the derived form. For instance, consider the combination of the noun göz ‘eye’ with two derivational suffixes -lIK and -CI: Even though the same three morphemes are used, the meaning of a word like gözcülük ‘scouting’ is clearly different from that of gözlükçü ‘optician’.
+## Dilbaz
+Here we present a new morphological analyzer, which is (i) open: The latest version of source codes, the lexicon, and the morphotactic rule engine are all available here, (ii) extendible: One of the disadvantages of other morphological analyzers is that their lexicons are fixed or unmodifiable, which prevents to add new bare-forms to the morphological analyzer. In our morphological analyzer, the lexicon is in text form and is easily modifiable, (iii) fast: Morphological analysis is one of the core components of any NLP process. It must be very fast to handle huge corpora. Compared to other morphological analyzers, our analyzer is capable of analyzing hundreds of thousands words per second, which makes it one of the fastest Turkish morphological analyzers available.
+The morphological analyzer consists of five main components, namely, a lexicon, a finite state transducer, a rule engine for suffixation, a trie data structure, and a least recently used (LRU) cache.
+In this analyzer, we assume all idiosyncratic information to be encoded in the lexicon. While phonologically conditioned allomorphy will be dealt with by the transducer, other types of allomorphy, all exceptional forms to otherwise regular processes, as well as words formed through derivation (except for the few transparently compositional derivational suffixes are considered to be included in the lexicon.
+In our morphological analyzer, finite state transducer is encoded in an xml file.
+To overcome the irregularities and also to accelerate the search for the bareforms, we use a trie data structure in our morphological analyzer, and store all words in our lexicon in that data structure. For the regular words, we only store that word in our trie, whereas for irregular words we store both the original form and some prefix of that word.
+For Developers
+============
+You can also see [Python](https://github.com/starlangsoftware/TurkishMorphologicalAnalysis-Py),
+[Java](https://github.com/starlangsoftware/TurkishMorphologicalAnalysis), [C++](https://github.com/starlangsoftware/TurkishMorphologicalAnalysis-CPP),
+[Swift](https://github.com/starlangsoftware/TurkishMorphologicalAnalysis-Swift), [Cython](https://github.com/starlangsoftware/TurkishMorphologicalAnalysis-Cy),
+or [C#](https://github.com/starlangsoftware/TurkishMorphologicalAnalysis-CS) repository.
+Detailed Description
+============
++ [Creating FsmMorphologicalAnalyzer](#creating-fsmmorphologicalanalyzer)
++ [Word level morphological analysis](#word-level-morphological-analysis)
++ [Sentence level morphological analysis](#sentence-level-morphological-analysis)
+## Creating FsmMorphologicalAnalyzer
+FsmMorphologicalAnalyzer provides Turkish morphological analysis. This class can be created as follows:
+    let fsm = new FsmMorphologicalAnalyzer();
+This generates a new `TxtDictionary` type dictionary from [`turkish_dictionary.txt`](https://github.com/olcaytaner/Dictionary/tree/master/src/main/resources) with fixed cache size 100000 and by using [`turkish_finite_state_machine.xml`](https://github.com/olcaytaner/MorphologicalAnalysis/tree/master/src/main/resources).
+Creating a morphological analyzer with different cache size, dictionary or finite state machine is also possible.
+* With different cache size,
+        let fsm = new FsmMorphologicalAnalyzer(50000);
+* Using a different dictionary,
+        let fsm = new FsmMorphologicalAnalyzer("my_turkish_dictionary.txt");
+* Specifying both finite state machine and dictionary,
+        let fsm = new FsmMorphologicalAnalyzer("fsm.xml", "my_turkish_dictionary.txt") ;
+* Giving finite state machine and cache size with creating `TxtDictionary` object,
+        let dictionary = new TxtDictionary("my_turkish_dictionary.txt", WordComparator.TURKISH);
+        let fsm = new FsmMorphologicalAnalyzer("fsm.xml", dictionary, 50000) ;
+* With different finite state machine and creating `TxtDictionary` object,
+        let dictionary = new TxtDictionary("my_turkish_dictionary.txt", WordComparator.TURKISH, "my_turkish_misspelled.txt");
+        let fsm = new FsmMorphologicalAnalyzer("fsm.xml", dictionary);
+## Word level morphological analysis
+For morphological analysis,  `MorphologicalAnalysis(String word)` method of `FsmMorphologicalAnalyzer` is used. This returns `FsmParseList` object.
+    let fsm = new FsmMorphologicalAnalyzer();
+    let word = "yarına";
+    let fsmParseList = fsm.morphologicalAnalysis(word);
+    for (let i = 0; i < fsmParseList.size(); i++){
+      console.log(fsmParseList.getFsmParse(i).getTransitionList();
+    }
+Output
+    yar+NOUN+A3SG+P2SG+DAT
+    yar+NOUN+A3SG+P3SG+DAT
+    yarı+NOUN+A3SG+P2SG+DAT
+    yarın+NOUN+A3SG+PNON+DAT
+From `FsmParseList`, a single `FsmParse` can be obtained as follows:
+    let parse = fsmParseList.getFsmParse(0);
+    console.log(parse.getTransitionList();
+Output
+    yar+NOUN+A3SG+P2SG+DAT
+## Sentence level morphological analysis
+`morphologicalAnalysis(Sentence sentence)` method of `FsmMorphologicalAnalyzer` is used. This returns `FsmParseList[]` object.
+    let fsm = new FsmMorphologicalAnalyzer();
+    let sentence = new Sentence("Yarın doktora gidecekler");
+    let parseLists = fsm.morphologicalAnalysis(sentence);
+    for(let i = 0; i < parseLists.length; i++){
+        for(let j = 0; j < parseLists[i].size(); j++){
+            let parse = parseLists[i].getFsmParse(j);
+            console.log(parse.getTransitionList());
+        }
+        console.log("-----------------");
+    }
+Output
+    -----------------
+    yar+NOUN+A3SG+P2SG+NOM
+    yar+NOUN+A3SG+PNON+GEN
+    yar+VERB+POS+IMP+A2PL
+    yarı+NOUN+A3SG+P2SG+NOM
+    yarın+NOUN+A3SG+PNON+NOM
+    -----------------
+    doktor+NOUN+A3SG+PNON+DAT
+    doktora+NOUN+A3SG+PNON+NOM
+    -----------------
+    git+VERB+POS+FUT+A3PL
+    git+VERB+POS^DB+NOUN+FUTPART+A3PL+PNON+NOM
+# Cite
+	@inproceedings{yildiz-etal-2019-open,
+    	title = "An Open, Extendible, and Fast {T}urkish Morphological Analyzer",
+    	author = {Y{\i}ld{\i}z, Olcay Taner  and
+      	Avar, Beg{\"u}m  and
+      	Ercan, G{\"o}khan},
+    	booktitle = "Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019)",
+    	month = sep,
+    	year = "2019",
+    	address = "Varna, Bulgaria",
+    	publisher = "INCOMA Ltd.",
+    	url = "https://www.aclweb.org/anthology/R19-1156",
+    	doi = "10.26615/978-954-452-056-4_156",
+    	pages = "1364--1372",
+	}

package/dist/Corpus/DisambiguatedWord.d.ts ADDED Viewed

@@ -0,0 +1,20 @@
+import { Word } from "nlptoolkit-dictionary/dist/Dictionary/Word";
+import { MorphologicalParse } from "../MorphologicalAnalysis/MorphologicalParse";
+export declare class DisambiguatedWord extends Word {
+    private parse;
+    /**
+     * The constructor of {@link DisambiguatedWord} class which takes a {@link String} and a {@link MorphologicalParse}
+     * as inputs. It creates a new {@link MorphologicalParse} with given MorphologicalParse. It generates a new instance with
+     * given {@link String}.
+     *
+     * @param name  Instances that will be a DisambiguatedWord.
+     * @param parse {@link MorphologicalParse} of the {@link DisambiguatedWord}.
+     */
+    constructor(name: string, parse: MorphologicalParse);
+    /**
+     * Accessor for the {@link MorphologicalParse}.
+     *
+     * @return MorphologicalParse.
+     */
+    getParse(): MorphologicalParse;
+}

package/dist/Corpus/DisambiguatedWord.js ADDED Viewed

@@ -0,0 +1,38 @@
+(function (factory) {
+    if (typeof module === "object" && typeof module.exports === "object") {
+        var v = factory(require, exports);
+        if (v !== undefined) module.exports = v;
+    }
+    else if (typeof define === "function" && define.amd) {
+        define(["require", "exports", "nlptoolkit-dictionary/dist/Dictionary/Word"], factory);
+    }
+})(function (require, exports) {
+    "use strict";
+    Object.defineProperty(exports, "__esModule", { value: true });
+    exports.DisambiguatedWord = void 0;
+    const Word_1 = require("nlptoolkit-dictionary/dist/Dictionary/Word");
+    class DisambiguatedWord extends Word_1.Word {
+        /**
+         * The constructor of {@link DisambiguatedWord} class which takes a {@link String} and a {@link MorphologicalParse}
+         * as inputs. It creates a new {@link MorphologicalParse} with given MorphologicalParse. It generates a new instance with
+         * given {@link String}.
+         *
+         * @param name  Instances that will be a DisambiguatedWord.
+         * @param parse {@link MorphologicalParse} of the {@link DisambiguatedWord}.
+         */
+        constructor(name, parse) {
+            super(name);
+            this.parse = parse;
+        }
+        /**
+         * Accessor for the {@link MorphologicalParse}.
+         *
+         * @return MorphologicalParse.
+         */
+        getParse() {
+            return this.parse;
+        }
+    }
+    exports.DisambiguatedWord = DisambiguatedWord;
+});
+//# sourceMappingURL=DisambiguatedWord.js.map

package/dist/Corpus/DisambiguatedWord.js.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"DisambiguatedWord.js","sourceRoot":"","sources":["../../source/Corpus/DisambiguatedWord.ts"],"names":[],"mappings":";;;;;;;;;;;;IAAA,qEAAgE;IAGhE,MAAa,iBAAkB,SAAQ,WAAI;QAIvC;;;;;;;WAOG;QACH,YAAY,IAAY,EAAE,KAAyB;YAC/C,KAAK,CAAC,IAAI,CAAC,CAAC;YACZ,IAAI,CAAC,KAAK,GAAG,KAAK,CAAA;QACtB,CAAC;QAED;;;;WAIG;QACH,QAAQ;YACJ,OAAO,IAAI,CAAC,KAAK,CAAA;QACrB,CAAC;KACJ;IAzBD,8CAyBC"}

package/dist/Corpus/DisambiguationCorpus.d.ts ADDED Viewed

@@ -0,0 +1,4 @@
+import { Corpus } from "nlptoolkit-corpus/dist/Corpus";
+export declare class DisambiguationCorpus extends Corpus {
+    constructor(fileName: string);
+}

package/dist/Corpus/DisambiguationCorpus.js ADDED Viewed

@@ -0,0 +1,54 @@
+(function (factory) {
+    if (typeof module === "object" && typeof module.exports === "object") {
+        var v = factory(require, exports);
+        if (v !== undefined) module.exports = v;
+    }
+    else if (typeof define === "function" && define.amd) {
+        define(["require", "exports", "nlptoolkit-corpus/dist/Corpus", "fs", "./DisambiguatedWord", "../MorphologicalAnalysis/MorphologicalParse", "nlptoolkit-corpus/dist/Sentence"], factory);
+    }
+})(function (require, exports) {
+    "use strict";
+    Object.defineProperty(exports, "__esModule", { value: true });
+    exports.DisambiguationCorpus = void 0;
+    const Corpus_1 = require("nlptoolkit-corpus/dist/Corpus");
+    const fs = require("fs");
+    const DisambiguatedWord_1 = require("./DisambiguatedWord");
+    const MorphologicalParse_1 = require("../MorphologicalAnalysis/MorphologicalParse");
+    const Sentence_1 = require("nlptoolkit-corpus/dist/Sentence");
+    class DisambiguationCorpus extends Corpus_1.Corpus {
+        constructor(fileName) {
+            super();
+            if (fileName != undefined) {
+                let newSentence = undefined;
+                let data = fs.readFileSync(fileName, 'utf8');
+                let lines = data.split("\n");
+                for (let line of lines) {
+                    let word = line.substring(0, line.indexOf("\t"));
+                    let parse = line.substring(line.indexOf("\t") + 1);
+                    if (word != "" && parse != "") {
+                        let newWord = new DisambiguatedWord_1.DisambiguatedWord(word, new MorphologicalParse_1.MorphologicalParse(parse));
+                        if (word == "<S>") {
+                            newSentence = new Sentence_1.Sentence();
+                        }
+                        else {
+                            if (word == "</S>") {
+                                this.addSentence(newSentence);
+                            }
+                            else {
+                                if (word == "<DOC>" || word == "</DOC>" || word == "<TITLE>" || word == "</TITLE>") {
+                                }
+                                else {
+                                    if (newSentence != null) {
+                                        newSentence.addWord(newWord);
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+    exports.DisambiguationCorpus = DisambiguationCorpus;
+});
+//# sourceMappingURL=DisambiguationCorpus.js.map

package/dist/Corpus/DisambiguationCorpus.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"DisambiguationCorpus.js","sourceRoot":"","sources":["../../source/Corpus/DisambiguationCorpus.ts"],"names":[],"mappings":";;;;;;;;;;;;IAAA,0DAAqD;IACrD,yBAAyB;IACzB,2DAAsD;IACtD,oFAA+E;IAC/E,8DAAyD;IAEzD,MAAa,oBAAqB,SAAQ,eAAM;QAE5C,YAAY,QAAgB;YACxB,KAAK,EAAE,CAAC;YACR,IAAI,QAAQ,IAAI,SAAS,EAAC;gBACtB,IAAI,WAAW,GAAG,SAAS,CAAC;gBAC5B,IAAI,IAAI,GAAG,EAAE,CAAC,YAAY,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAA;gBAC5C,IAAI,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;gBAC5B,KAAK,IAAI,IAAI,IAAI,KAAK,EAAE;oBACpB,IAAI,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC;oBACjD,IAAI,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;oBACnD,IAAI,IAAI,IAAI,EAAE,IAAI,KAAK,IAAI,EAAE,EAAE;wBAC3B,IAAI,OAAO,GAAG,IAAI,qCAAiB,CAAC,IAAI,EAAE,IAAI,uCAAkB,CAAC,KAAK,CAAC,CAAC,CAAC;wBACzE,IAAI,IAAI,IAAI,KAAK,EAAE;4BACf,WAAW,GAAG,IAAI,mBAAQ,EAAE,CAAC;yBAChC;6BAAM;4BACH,IAAI,IAAI,IAAI,MAAM,EAAE;gCAChB,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC,CAAC;6BACjC;iCAAM;gCACH,IAAI,IAAI,IAAI,OAAO,IAAI,IAAI,IAAI,QAAQ,IAAI,IAAI,IAAI,SAAS,IAAI,IAAI,IAAI,UAAU,EAAE;iCACnF;qCAAM;oCACH,IAAI,WAAW,IAAI,IAAI,EAAE;wCACrB,WAAW,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;qCAChC;iCACJ;6BACJ;yBACJ;qBACJ;iBACJ;aACJ;QACL,CAAC;KAEJ;IAhCD,oDAgCC"}

package/dist/MorphologicalAnalysis/FiniteStateMachine.d.ts ADDED Viewed

@@ -0,0 +1,63 @@
+import { State } from "./State";
+import { Transition } from "./Transition";
+export declare class FiniteStateMachine {
+    private states;
+    private transitions;
+    /**
+     * Constructor reads the finite state machine in the given input file. It has a NodeList which holds the states
+     * of the nodes and there are 4 different type of nodes; stateNode, root Node, transitionNode and withNode.
+     * Also there are two states; state that a node currently in and state that a node will be in.
+     * <p>
+     * DOMParser is used to parse the given file. Firstly it gets the document to parse, then gets its elements by the
+     * tag names. For instance, it gets states by the tag name 'state' and puts them into an ArrayList called stateList.
+     * Secondly, it traverses this stateList and gets each Node's attributes. There are three attributes; name, start,
+     * and end which will be named as states. If a node is in a startState it is tagged as 'yes', otherwise 'no'.
+     * Also, if a node is in a startState, additional attribute will be fetched; originalPos that represents its original
+     * part of speech.
+     * <p>
+     * At the last step, by starting rootNode's first child, it gets all the transitionNodes and next states called toState,
+     * then continue with the nextSiblings. Also, if there is no possible toState, it prints this case and the causative states.
+     *
+     * @param fileName the resource file to read the finite state machine. Only files in resources folder are supported.
+     */
+    constructor(fileName?: string);
+    /**
+     * The isValidTransition loops through states ArrayList and checks transitions between states. If the actual transition
+     * equals to the given transition input, method returns true otherwise returns false.
+     *
+     * @param transition is used to compare with the actual transition of a state.
+     * @return true when the actual transition equals to the transition input, false otherwise.
+     */
+    isValidTransition(transition: string): boolean;
+    /**
+     * the getStates method returns the states in the FiniteStateMachine.
+     * @return StateList.
+     */
+    getStates(): Array<State>;
+    /**
+     * The getState method is used to loop through the states {@link Array} and return the state whose name equal
+     * to the given input name.
+     *
+     * @param name is used to compare with the state's actual name.
+     * @return state if found any, null otherwise.
+     */
+    getState(name: string): State;
+    /**
+     * Another addTransition method which takes additional argument; toPos and. It creates a new {@link Transition}
+     * with given input parameters and adds the transition to transitions {@link Array}.
+     *
+     * @param fromState  State type input indicating the from state.
+     * @param toState  State type input indicating the next state.
+     * @param _with     String input indicating with what the transition will be made.
+     * @param withName String input.
+     * @param toPos    String input.
+     */
+    addTransition(fromState: State, toState: State, _with: string, withName: string, toPos?: string): void;
+    /**
+     * The getTransitions method returns the transitions at the given state.
+     *
+     * @param state State input.
+     * @return transitions at given state.
+     */
+    getTransitions(state: State): Array<Transition>;
+}

package/dist/MorphologicalAnalysis/FiniteStateMachine.js ADDED Viewed

@@ -0,0 +1,178 @@
+(function (factory) {
+    if (typeof module === "object" && typeof module.exports === "object") {
+        var v = factory(require, exports);
+        if (v !== undefined) module.exports = v;
+    }
+    else if (typeof define === "function" && define.amd) {
+        define(["require", "exports", "./State", "./Transition", "nlptoolkit-xmlparser/dist/XmlDocument"], factory);
+    }
+})(function (require, exports) {
+    "use strict";
+    Object.defineProperty(exports, "__esModule", { value: true });
+    exports.FiniteStateMachine = void 0;
+    const State_1 = require("./State");
+    const Transition_1 = require("./Transition");
+    const XmlDocument_1 = require("nlptoolkit-xmlparser/dist/XmlDocument");
+    class FiniteStateMachine {
+        /**
+         * Constructor reads the finite state machine in the given input file. It has a NodeList which holds the states
+         * of the nodes and there are 4 different type of nodes; stateNode, root Node, transitionNode and withNode.
+         * Also there are two states; state that a node currently in and state that a node will be in.
+         * <p>
+         * DOMParser is used to parse the given file. Firstly it gets the document to parse, then gets its elements by the
+         * tag names. For instance, it gets states by the tag name 'state' and puts them into an ArrayList called stateList.
+         * Secondly, it traverses this stateList and gets each Node's attributes. There are three attributes; name, start,
+         * and end which will be named as states. If a node is in a startState it is tagged as 'yes', otherwise 'no'.
+         * Also, if a node is in a startState, additional attribute will be fetched; originalPos that represents its original
+         * part of speech.
+         * <p>
+         * At the last step, by starting rootNode's first child, it gets all the transitionNodes and next states called toState,
+         * then continue with the nextSiblings. Also, if there is no possible toState, it prints this case and the causative states.
+         *
+         * @param fileName the resource file to read the finite state machine. Only files in resources folder are supported.
+         */
+        constructor(fileName = "turkish_finite_state_machine.xml") {
+            this.states = new Array();
+            this.transitions = new Map();
+            let xmlDocument = new XmlDocument_1.XmlDocument(fileName);
+            xmlDocument.parse();
+            let stateListNode = xmlDocument.getFirstChild();
+            let stateNode = stateListNode.getFirstChild();
+            while (stateNode != undefined) {
+                if (stateNode.hasAttributes()) {
+                    let stateName = stateNode.getAttributeValue("name");
+                    let startState = stateNode.getAttributeValue("start");
+                    let endState = stateNode.getAttributeValue("end");
+                    let state;
+                    if (startState == "yes") {
+                        let originalPos = stateNode.getAttributeValue("originalpos");
+                        state = new State_1.State(stateName, true, endState == "yes", originalPos);
+                    }
+                    else {
+                        state = new State_1.State(stateName, false, endState == "yes");
+                    }
+                    this.states.push(state);
+                }
+                stateNode = stateNode.getNextSibling();
+            }
+            stateNode = stateListNode.getFirstChild();
+            while (stateNode != undefined) {
+                if (stateNode.hasAttributes()) {
+                    let stateName = stateNode.getAttributeValue("name");
+                    let state = this.getState(stateName);
+                    let transitionNode = stateNode.getFirstChild();
+                    while (transitionNode != undefined) {
+                        if (transitionNode.hasAttributes()) {
+                            let toStateName = transitionNode.getAttributeValue("name");
+                            let toState = this.getState(toStateName);
+                            let withName = transitionNode.getAttributeValue("transitionname");
+                            let rootToPos = transitionNode.getAttributeValue("topos");
+                            let withNode = transitionNode.getFirstChild();
+                            while (withNode != undefined) {
+                                let toPos;
+                                if (withNode.hasAttributes()) {
+                                    withName = withNode.getAttributeValue("name");
+                                    toPos = withNode.getAttributeValue("topos");
+                                }
+                                else {
+                                    toPos = "";
+                                }
+                                if (toPos == "") {
+                                    if (rootToPos == "") {
+                                        this.addTransition(state, toState, withNode.getPcData(), withName);
+                                    }
+                                    else {
+                                        this.addTransition(state, toState, withNode.getPcData(), withName, rootToPos);
+                                    }
+                                }
+                                else {
+                                    this.addTransition(state, toState, withNode.getPcData(), withName, toPos);
+                                }
+                                withNode = withNode.getNextSibling();
+                            }
+                        }
+                        transitionNode = transitionNode.getNextSibling();
+                    }
+                }
+                stateNode = stateNode.getNextSibling();
+            }
+        }
+        /**
+         * The isValidTransition loops through states ArrayList and checks transitions between states. If the actual transition
+         * equals to the given transition input, method returns true otherwise returns false.
+         *
+         * @param transition is used to compare with the actual transition of a state.
+         * @return true when the actual transition equals to the transition input, false otherwise.
+         */
+        isValidTransition(transition) {
+            for (let state of this.transitions.keys()) {
+                for (let transition1 of this.transitions.get(state)) {
+                    if (transition1.toString() != undefined && transition1.toString() == transition) {
+                        return true;
+                    }
+                }
+            }
+            return false;
+        }
+        /**
+         * the getStates method returns the states in the FiniteStateMachine.
+         * @return StateList.
+         */
+        getStates() {
+            return this.states;
+        }
+        /**
+         * The getState method is used to loop through the states {@link Array} and return the state whose name equal
+         * to the given input name.
+         *
+         * @param name is used to compare with the state's actual name.
+         * @return state if found any, null otherwise.
+         */
+        getState(name) {
+            for (let state of this.states) {
+                if (state.getName() == name) {
+                    return state;
+                }
+            }
+            return undefined;
+        }
+        /**
+         * Another addTransition method which takes additional argument; toPos and. It creates a new {@link Transition}
+         * with given input parameters and adds the transition to transitions {@link Array}.
+         *
+         * @param fromState  State type input indicating the from state.
+         * @param toState  State type input indicating the next state.
+         * @param _with     String input indicating with what the transition will be made.
+         * @param withName String input.
+         * @param toPos    String input.
+         */
+        addTransition(fromState, toState, _with, withName, toPos) {
+            let newTransition = new Transition_1.Transition(_with, withName, toState, toPos);
+            let transitionList;
+            if (this.transitions.has(fromState)) {
+                transitionList = this.transitions.get(fromState);
+            }
+            else {
+                transitionList = new Array();
+            }
+            transitionList.push(newTransition);
+            this.transitions.set(fromState, transitionList);
+        }
+        /**
+         * The getTransitions method returns the transitions at the given state.
+         *
+         * @param state State input.
+         * @return transitions at given state.
+         */
+        getTransitions(state) {
+            if (this.transitions.has(state)) {
+                return this.transitions.get(state);
+            }
+            else {
+                return new Array();
+            }
+        }
+    }
+    exports.FiniteStateMachine = FiniteStateMachine;
+});
+//# sourceMappingURL=FiniteStateMachine.js.map

package/dist/MorphologicalAnalysis/FiniteStateMachine.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"FiniteStateMachine.js","sourceRoot":"","sources":["../../source/MorphologicalAnalysis/FiniteStateMachine.ts"],"names":[],"mappings":";;;;;;;;;;;;IAAA,mCAA8B;IAC9B,6CAAwC;IACxC,uEAAkE;IAElE,MAAa,kBAAkB;QAK3B;;;;;;;;;;;;;;;;WAgBG;QACH,YAAY,WAAmB,kCAAkC;YApBzD,WAAM,GAAiB,IAAI,KAAK,EAAS,CAAA;YACzC,gBAAW,GAAkC,IAAI,GAAG,EAA4B,CAAA;YAoBpF,IAAI,WAAW,GAAG,IAAI,yBAAW,CAAC,QAAQ,CAAC,CAAA;YAC3C,WAAW,CAAC,KAAK,EAAE,CAAC;YACpB,IAAI,aAAa,GAAG,WAAW,CAAC,aAAa,EAAE,CAAC;YAChD,IAAI,SAAS,GAAG,aAAa,CAAC,aAAa,EAAE,CAAC;YAC9C,OAAO,SAAS,IAAI,SAAS,EAAE;gBAC3B,IAAI,SAAS,CAAC,aAAa,EAAE,EAAE;oBAC3B,IAAI,SAAS,GAAG,SAAS,CAAC,iBAAiB,CAAC,MAAM,CAAC,CAAC;oBACpD,IAAI,UAAU,GAAG,SAAS,CAAC,iBAAiB,CAAC,OAAO,CAAC,CAAC;oBACtD,IAAI,QAAQ,GAAG,SAAS,CAAC,iBAAiB,CAAC,KAAK,CAAC,CAAC;oBAClD,IAAI,KAAK,CAAA;oBACT,IAAI,UAAU,IAAI,KAAK,EAAE;wBACrB,IAAI,WAAW,GAAG,SAAS,CAAC,iBAAiB,CAAC,aAAa,CAAC,CAAC;wBAC7D,KAAK,GAAG,IAAI,aAAK,CAAC,SAAS,EAAE,IAAI,EAAE,QAAQ,IAAI,KAAK,EAAE,WAAW,CAAC,CAAC;qBACtE;yBAAM;wBACH,KAAK,GAAG,IAAI,aAAK,CAAC,SAAS,EAAE,KAAK,EAAE,QAAQ,IAAI,KAAK,CAAC,CAAC;qBAC1D;oBACD,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;iBAC3B;gBACD,SAAS,GAAG,SAAS,CAAC,cAAc,EAAE,CAAC;aAC1C;YACD,SAAS,GAAG,aAAa,CAAC,aAAa,EAAE,CAAC;YAC1C,OAAO,SAAS,IAAI,SAAS,EAAC;gBAC1B,IAAI,SAAS,CAAC,aAAa,EAAE,EAAC;oBAC1B,IAAI,SAAS,GAAG,SAAS,CAAC,iBAAiB,CAAC,MAAM,CAAC,CAAC;oBACpD,IAAI,KAAK,GAAG,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;oBACrC,IAAI,cAAc,GAAG,SAAS,CAAC,aAAa,EAAE,CAAC;oBAC/C,OAAO,cAAc,IAAI,SAAS,EAAC;wBAC/B,IAAI,cAAc,CAAC,aAAa,EAAE,EAAC;4BAC/B,IAAI,WAAW,GAAG,cAAc,CAAC,iBAAiB,CAAC,MAAM,CAAC,CAAC;4BAC3D,IAAI,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;4BACzC,IAAI,QAAQ,GAAG,cAAc,CAAC,iBAAiB,CAAC,gBAAgB,CAAC,CAAC;4BAClE,IAAI,SAAS,GAAG,cAAc,CAAC,iBAAiB,CAAC,OAAO,CAAC,CAAC;4BAC1D,IAAI,QAAQ,GAAG,cAAc,CAAC,aAAa,EAAE,CAAC;4BAC9C,OAAO,QAAQ,IAAI,SAAS,EAAC;gCACzB,IAAI,KAAK,CAAA;gCACT,IAAI,QAAQ,CAAC,aAAa,EAAE,EAAC;oCACzB,QAAQ,GAAG,QAAQ,CAAC,iBAAiB,CAAC,MAAM,CAAC,CAAC;oCAC9C,KAAK,GAAG,QAAQ,CAAC,iBAAiB,CAAC,OAAO,CAAC,CAAC;iCAC/C;qCAAM;oCACH,KAAK,GAAG,EAAE,CAAC;iCACd;gCACD,IAAI,KAAK,IAAI,EAAE,EAAC;oCACZ,IAAI,SAAS,IAAI,EAAE,EAAC;wCAChB,IAAI,CAAC,aAAa,CAAC,KAAK,EAAE,OAAO,EAAE,QAAQ,CAAC,SAAS,EAAE,EAAE,QAAQ,CAAC,CAAC;qCACtE;yCAAM;wCACH,IAAI,CAAC,aAAa,CAAC,KAAK,EAAE,OAAO,EAAE,QAAQ,CAAC,SAAS,EAAE,EAAE,QAAQ,EAAE,SAAS,CAAC,CAAC;qCACjF;iCACJ;qCAAM;oCACH,IAAI,CAAC,aAAa,CAAC,KAAK,EAAE,OAAO,EAAE,QAAQ,CAAC,SAAS,EAAE,EAAE,QAAQ,EAAE,KAAK,CAAC,CAAC;iCAC7E;gCACD,QAAQ,GAAG,QAAQ,CAAC,cAAc,EAAE,CAAC;6BACxC;yBACJ;wBACD,cAAc,GAAG,cAAc,CAAC,cAAc,EAAE,CAAC;qBACpD;iBACJ;gBACD,SAAS,GAAG,SAAS,CAAC,cAAc,EAAE,CAAC;aAC1C;QACL,CAAC;QAED;;;;;;WAMG;QACH,iBAAiB,CAAC,UAAkB;YAChC,KAAK,IAAI,KAAK,IAAI,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,EAAE;gBACvC,KAAK,IAAI,WAAW,IAAI,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE;oBACjD,IAAI,WAAW,CAAC,QAAQ,EAAE,IAAI,SAAS,IAAI,WAAW,CAAC,QAAQ,EAAE,IAAI,UAAU,EAAE;wBAC7E,OAAO,IAAI,CAAC;qBACf;iBACJ;aACJ;YACD,OAAO,KAAK,CAAC;QACjB,CAAC;QAED;;;WAGG;QACH,SAAS;YACL,OAAO,IAAI,CAAC,MAAM,CAAA;QACtB,CAAC;QAED;;;;;;WAMG;QACH,QAAQ,CAAC,IAAY;YACjB,KAAK,IAAI,KAAK,IAAI,IAAI,CAAC,MAAM,EAAE;gBAC3B,IAAI,KAAK,CAAC,OAAO,EAAE,IAAI,IAAI,EAAE;oBACzB,OAAO,KAAK,CAAC;iBAChB;aACJ;YACD,OAAO,SAAS,CAAC;QACrB,CAAC;QAED;;;;;;;;;WASG;QACH,aAAa,CAAC,SAAgB,EAAE,OAAc,EAAE,KAAa,EAAE,QAAgB,EAAE,KAAc;YAC3F,IAAI,aAAa,GAAG,IAAI,uBAAU,CAAC,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,KAAK,CAAC,CAAC;YACpE,IAAI,cAAc,CAAA;YAClB,IAAI,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,SAAS,CAAC,EAAC;gBAChC,cAAc,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;aACpD;iBAAM;gBACH,cAAc,GAAG,IAAI,KAAK,EAAc,CAAA;aAC3C;YACD,cAAc,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;YACnC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,SAAS,EAAE,cAAc,CAAC,CAAC;QACpD,CAAC;QAED;;;;;WAKG;QACH,cAAc,CAAC,KAAY;YACvB,IAAI,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,KAAK,CAAC,EAAC;gBAC5B,OAAO,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;aACtC;iBAAM;gBACH,OAAO,IAAI,KAAK,EAAc,CAAC;aAClC;QACL,CAAC;KACJ;IAhKD,gDAgKC"}