nlptoolkit-morphologicalanalysis 1.0.13 → 1.0.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/Corpus/DisambiguationCorpus.d.ts +7 -0
- package/dist/Corpus/DisambiguationCorpus.js +7 -0
- package/dist/Corpus/DisambiguationCorpus.js.map +1 -1
- package/dist/MorphologicalAnalysis/FsmMorphologicalAnalyzer.d.ts +71 -3
- package/dist/MorphologicalAnalysis/FsmMorphologicalAnalyzer.js +164 -41
- package/dist/MorphologicalAnalysis/FsmMorphologicalAnalyzer.js.map +1 -1
- package/dist/MorphologicalAnalysis/FsmParse.d.ts +9 -0
- package/dist/MorphologicalAnalysis/FsmParse.js +15 -0
- package/dist/MorphologicalAnalysis/FsmParse.js.map +1 -1
- package/dist/MorphologicalAnalysis/InflectionalGroup.js +3 -2
- package/dist/MorphologicalAnalysis/InflectionalGroup.js.map +1 -1
- package/dist/MorphologicalAnalysis/MorphologicalParse.d.ts +98 -0
- package/dist/MorphologicalAnalysis/MorphologicalParse.js +161 -10
- package/dist/MorphologicalAnalysis/MorphologicalParse.js.map +1 -1
- package/dist/MorphologicalAnalysis/MorphologicalTag.d.ts +9 -1
- package/dist/MorphologicalAnalysis/MorphologicalTag.js +8 -0
- package/dist/MorphologicalAnalysis/MorphologicalTag.js.map +1 -1
- package/dist/MorphologicalAnalysis/MorphotacticEngine.d.ts +47 -0
- package/dist/MorphologicalAnalysis/MorphotacticEngine.js +51 -1
- package/dist/MorphologicalAnalysis/MorphotacticEngine.js.map +1 -1
- package/dist/MorphologicalAnalysis/Transition.d.ts +22 -8
- package/dist/MorphologicalAnalysis/Transition.js +25 -9
- package/dist/MorphologicalAnalysis/Transition.js.map +1 -1
- package/package.json +2 -2
- package/parses/ac/314/247/304/261kla.txt +57 -3
- package/parses/ak.txt +72 -3
- package/parses/aksa.txt +40 -2
- package/parses/anla.txt +57 -3
- package/parses/azal.txt +63 -4
- package/parses/bo/314/210l.txt +53 -3
- package/parses/bul.txt +53 -3
- package/parses/cenk.txt +8 -0
- package/parses/cevapla.txt +74 -4
- package/parses/cos/314/247.txt +53 -3
- package/parses/c/314/247o/314/210k.txt +54 -3
- package/parses/c/314/247/304/261k.txt +59 -3
- package/parses/del.txt +47 -3
- package/parses/doldur.txt +47 -3
- package/parses/emlak.txt +2 -0
- package/parses/git.txt +59 -3
- package/parses/giy.txt +59 -3
- package/parses/go/314/210c/314/247.txt +59 -3
- package/parses/go/314/210ster.txt +63 -4
- package/parses/hal.txt +20 -4
- package/parses/kalp.txt +29 -4
- package/parses/kavur.txt +80 -5
- package/parses/kaydol.txt +69 -4
- package/parses/resim.txt +14 -0
- package/parses/s/304/261ska.txt +24 -0
- package/parses/ye.txt +40 -2
- package/parses/yemek.txt +6 -0
- package/parses/y/304/261ka.txt +90 -5
- package/parses/y/304/261ldo/314/210nu/314/210mu/314/210.txt +6 -0
- package/pronunciations.txt +490 -0
- package/source/Corpus/DisambiguationCorpus.ts +7 -0
- package/source/MorphologicalAnalysis/FsmMorphologicalAnalyzer.ts +171 -45
- package/source/MorphologicalAnalysis/FsmParse.ts +16 -1
- package/source/MorphologicalAnalysis/InflectionalGroup.ts +3 -2
- package/source/MorphologicalAnalysis/MorphologicalParse.ts +161 -10
- package/source/MorphologicalAnalysis/MorphologicalTag.ts +9 -1
- package/source/MorphologicalAnalysis/MorphotacticEngine.ts +51 -1
- package/source/MorphologicalAnalysis/Transition.ts +25 -9
- package/suffixes.txt +6313 -0
- package/tests/DisambiguationCorpusTest.js +14 -0
- package/tests/DisambiguationCorpusTest.js.map +1 -0
- package/tests/FiniteStateMachineTest.js +96 -0
- package/tests/FiniteStateMachineTest.js.map +1 -0
- package/tests/FiniteStateMachineTest.ts +1 -1
- package/tests/FsmMorphologicalAnalyzerTest.js +250 -0
- package/tests/FsmMorphologicalAnalyzerTest.js.map +1 -0
- package/tests/FsmMorphologicalAnalyzerTest.ts +12 -6
- package/tests/FsmParseListTest.js +100 -0
- package/tests/FsmParseListTest.js.map +1 -0
- package/tests/FsmParseTest.js +68 -0
- package/tests/FsmParseTest.js.map +1 -0
- package/tests/InflectionalGroupTest.js +86 -0
- package/tests/InflectionalGroupTest.js.map +1 -0
- package/tests/MorphologicalParseTest.js +154 -0
- package/tests/MorphologicalParseTest.js.map +1 -0
- package/tests/TransitionTest.js +184 -0
- package/tests/TransitionTest.js.map +1 -0
- package/tests/TransitionTest.ts +8 -0
- package/turkish_finite_state_machine.xml +11 -3
|
@@ -539,6 +539,12 @@ export class MorphologicalParse {
|
|
|
539
539
|
return "-XXX-";
|
|
540
540
|
}
|
|
541
541
|
|
|
542
|
+
/**
|
|
543
|
+
* Returns the pronoun type of the parse for universal dependency feature ProType.
|
|
544
|
+
* @return "Art" if the pronoun is also a determiner; "Prs" if the pronoun is personal pronoun; "Rcp" if the
|
|
545
|
+
* pronoun is 'birbiri'; "Ind" if the pronoun is an indeterminate pronoun; "Neg" if the pronoun is 'hiçbiri';
|
|
546
|
+
* "Int" if the pronoun is a question pronoun; "Dem" if the pronoun is a demonstrative pronoun.
|
|
547
|
+
*/
|
|
542
548
|
private getPronType(): string{
|
|
543
549
|
let lemma = this.root.getName();
|
|
544
550
|
if (this.containsTag(MorphologicalTag.DETERMINER)){
|
|
@@ -572,6 +578,11 @@ export class MorphologicalParse {
|
|
|
572
578
|
return undefined;
|
|
573
579
|
}
|
|
574
580
|
|
|
581
|
+
/**
|
|
582
|
+
* Returns the numeral type of the parse for universal dependency feature NumType.
|
|
583
|
+
* @return "Ord" if the parse is Time, Ordinal or the word is '%' or 'kaçıncı'; "Dist" if the word is a
|
|
584
|
+
* distributive number such as 'beşinci'; "Card" if the number is cardinal or any number or the word is 'kaç'.
|
|
585
|
+
*/
|
|
575
586
|
private getNumType(): string{
|
|
576
587
|
let lemma = this.root.getName();
|
|
577
588
|
if (lemma == "%" || this.containsTag(MorphologicalTag.TIME)){
|
|
@@ -589,6 +600,10 @@ export class MorphologicalParse {
|
|
|
589
600
|
return undefined;
|
|
590
601
|
}
|
|
591
602
|
|
|
603
|
+
/**
|
|
604
|
+
* Returns the value for the dependency feature Reflex.
|
|
605
|
+
* @return "Yes" if the root word is 'kendi', null otherwise.
|
|
606
|
+
*/
|
|
592
607
|
private getReflex(): string{
|
|
593
608
|
let lemma = this.root.getName();
|
|
594
609
|
if (lemma == "kendi"){
|
|
@@ -597,7 +612,20 @@ export class MorphologicalParse {
|
|
|
597
612
|
return undefined;
|
|
598
613
|
}
|
|
599
614
|
|
|
615
|
+
/**
|
|
616
|
+
* Returns the agreement of the parse for the universal dependency feature Number.
|
|
617
|
+
* @return "Sing" if the agreement of the parse is singular (contains A1SG, A2SG, A3SG); "Plur" if the agreement
|
|
618
|
+
* of the parse is plural (contains A1PL, A2PL, A3PL).
|
|
619
|
+
*/
|
|
600
620
|
private getNumber(): string{
|
|
621
|
+
if (this.lastIGContainsTag(MorphologicalTag.A1SG) || this.lastIGContainsTag(MorphologicalTag.A2SG) ||
|
|
622
|
+
this.lastIGContainsTag(MorphologicalTag.A3SG)){
|
|
623
|
+
return "Sing";
|
|
624
|
+
}
|
|
625
|
+
if (this.lastIGContainsTag(MorphologicalTag.A1PL) || this.lastIGContainsTag(MorphologicalTag.A2PL) ||
|
|
626
|
+
this.lastIGContainsTag(MorphologicalTag.A3PL)){
|
|
627
|
+
return "Plur";
|
|
628
|
+
}
|
|
601
629
|
if (this.containsTag(MorphologicalTag.A1SG) || this.containsTag(MorphologicalTag.A2SG) ||
|
|
602
630
|
this.containsTag(MorphologicalTag.A3SG)){
|
|
603
631
|
return "Sing";
|
|
@@ -609,7 +637,20 @@ export class MorphologicalParse {
|
|
|
609
637
|
return undefined;
|
|
610
638
|
}
|
|
611
639
|
|
|
640
|
+
/**
|
|
641
|
+
* Returns the possessive agreement of the parse for the universal dependency feature [Pos].
|
|
642
|
+
* @return "Sing" if the possessive agreement of the parse is singular (contains P1SG, P2SG, P3SG); "Plur" if the
|
|
643
|
+
* possessive agreement of the parse is plural (contains P1PL, P2PL, P3PL).
|
|
644
|
+
*/
|
|
612
645
|
private getPossessiveNumber(): string{
|
|
646
|
+
if (this.lastIGContainsTag(MorphologicalTag.P1SG) || this.lastIGContainsTag(MorphologicalTag.P2SG) ||
|
|
647
|
+
this.lastIGContainsTag(MorphologicalTag.P3SG)){
|
|
648
|
+
return "Sing";
|
|
649
|
+
}
|
|
650
|
+
if (this.lastIGContainsTag(MorphologicalTag.P1PL) || this.lastIGContainsTag(MorphologicalTag.P2PL) ||
|
|
651
|
+
this.lastIGContainsTag(MorphologicalTag.P3PL)){
|
|
652
|
+
return "Plur";
|
|
653
|
+
}
|
|
613
654
|
if (this.containsTag(MorphologicalTag.P1SG) || this.containsTag(MorphologicalTag.P2SG) ||
|
|
614
655
|
this.containsTag(MorphologicalTag.P3SG)){
|
|
615
656
|
return "Sing";
|
|
@@ -621,6 +662,11 @@ export class MorphologicalParse {
|
|
|
621
662
|
return undefined;
|
|
622
663
|
}
|
|
623
664
|
|
|
665
|
+
/**
|
|
666
|
+
* Returns the case marking of the parse for the universal dependency feature case.
|
|
667
|
+
* @return "Acc" for accusative marker; "Dat" for dative marker; "Gen" for genitive marker; "Loc" for locative
|
|
668
|
+
* marker; "Ins" for instrumentative marker; "Abl" for ablative marker; "Nom" for nominative marker.
|
|
669
|
+
*/
|
|
624
670
|
private getCase(): string{
|
|
625
671
|
if (this.containsTag(MorphologicalTag.ACCUSATIVE) || this.containsTag(MorphologicalTag.PCACCUSATIVE)){
|
|
626
672
|
return "Acc";
|
|
@@ -646,6 +692,11 @@ export class MorphologicalParse {
|
|
|
646
692
|
return undefined;
|
|
647
693
|
}
|
|
648
694
|
|
|
695
|
+
/**
|
|
696
|
+
* Returns the definiteness of the parse for the universal dependency feature definite. It applies only for
|
|
697
|
+
* determiners in Turkish.
|
|
698
|
+
* @return "Ind" for 'bir', 'bazı', or 'birkaç'. "Def" for 'her', 'bu', 'şu', 'o', 'bütün'.
|
|
699
|
+
*/
|
|
649
700
|
private getDefinite(): string{
|
|
650
701
|
let lemma = this.root.getName();
|
|
651
702
|
if (this.containsTag(MorphologicalTag.DETERMINER)){
|
|
@@ -659,6 +710,10 @@ export class MorphologicalParse {
|
|
|
659
710
|
return undefined;
|
|
660
711
|
}
|
|
661
712
|
|
|
713
|
+
/**
|
|
714
|
+
* Returns the degree of the parse for the universal dependency feature degree.
|
|
715
|
+
* @return "Cmp" for comparative adverb 'daha'; "Sup" for superlative adjective or adverb 'en'.
|
|
716
|
+
*/
|
|
662
717
|
private getDegree(): string{
|
|
663
718
|
let lemma = this.root.getName();
|
|
664
719
|
if (lemma == "daha"){
|
|
@@ -670,7 +725,14 @@ export class MorphologicalParse {
|
|
|
670
725
|
return undefined;
|
|
671
726
|
}
|
|
672
727
|
|
|
728
|
+
/**
|
|
729
|
+
* Returns the polarity of the verb for the universal dependency feature polarity.
|
|
730
|
+
* @return "Pos" for positive polarity containing tag POS; "Neg" for negative polarity containing tag NEG.
|
|
731
|
+
*/
|
|
673
732
|
private getPolarity(): string{
|
|
733
|
+
if (this.root.getName() == "değil"){
|
|
734
|
+
return "Neg";
|
|
735
|
+
}
|
|
674
736
|
if (this.containsTag(MorphologicalTag.POSITIVE)){
|
|
675
737
|
return "Pos";
|
|
676
738
|
}
|
|
@@ -680,7 +742,20 @@ export class MorphologicalParse {
|
|
|
680
742
|
return undefined;
|
|
681
743
|
}
|
|
682
744
|
|
|
745
|
+
/**
|
|
746
|
+
* Returns the person of the agreement of the parse for the universal dependency feature person.
|
|
747
|
+
* @return "1" for first person; "2" for second person; "3" for third person.
|
|
748
|
+
*/
|
|
683
749
|
private getPerson(): string{
|
|
750
|
+
if (this.lastIGContainsTag(MorphologicalTag.A1SG) || this.lastIGContainsTag(MorphologicalTag.A1PL)){
|
|
751
|
+
return "1";
|
|
752
|
+
}
|
|
753
|
+
if (this.lastIGContainsTag(MorphologicalTag.A2SG) || this.lastIGContainsTag(MorphologicalTag.A2PL)){
|
|
754
|
+
return "2";
|
|
755
|
+
}
|
|
756
|
+
if (this.lastIGContainsTag(MorphologicalTag.A3SG) || this.lastIGContainsTag(MorphologicalTag.A3PL)){
|
|
757
|
+
return "3";
|
|
758
|
+
}
|
|
684
759
|
if (this.containsTag(MorphologicalTag.A1SG) || this.containsTag(MorphologicalTag.A1PL)){
|
|
685
760
|
return "1";
|
|
686
761
|
}
|
|
@@ -693,7 +768,20 @@ export class MorphologicalParse {
|
|
|
693
768
|
return undefined;
|
|
694
769
|
}
|
|
695
770
|
|
|
771
|
+
/**
|
|
772
|
+
* Returns the person of the possessive agreement of the parse for the universal dependency feature [pos].
|
|
773
|
+
* @return "1" for first person; "2" for second person; "3" for third person.
|
|
774
|
+
*/
|
|
696
775
|
private getPossessivePerson(): string{
|
|
776
|
+
if (this.lastIGContainsTag(MorphologicalTag.P1SG) || this.lastIGContainsTag(MorphologicalTag.P1PL)){
|
|
777
|
+
return "1";
|
|
778
|
+
}
|
|
779
|
+
if (this.lastIGContainsTag(MorphologicalTag.P2SG) || this.lastIGContainsTag(MorphologicalTag.P2PL)){
|
|
780
|
+
return "2";
|
|
781
|
+
}
|
|
782
|
+
if (this.lastIGContainsTag(MorphologicalTag.P3SG) || this.lastIGContainsTag(MorphologicalTag.P3PL)){
|
|
783
|
+
return "3";
|
|
784
|
+
}
|
|
697
785
|
if (this.containsTag(MorphologicalTag.P1SG) || this.containsTag(MorphologicalTag.P1PL)){
|
|
698
786
|
return "1";
|
|
699
787
|
}
|
|
@@ -706,6 +794,12 @@ export class MorphologicalParse {
|
|
|
706
794
|
return undefined;
|
|
707
795
|
}
|
|
708
796
|
|
|
797
|
+
/**
|
|
798
|
+
* Returns the voice of the verb parse for the universal dependency feature voice.
|
|
799
|
+
* @return "CauPass" if the verb parse is both causative and passive; "Pass" if the verb parse is only passive;
|
|
800
|
+
* "Rcp" if the verb parse is reciprocal; "Cau" if the verb parse is only causative; "Rfl" if the verb parse is
|
|
801
|
+
* reflexive.
|
|
802
|
+
*/
|
|
709
803
|
private getVoice(): string{
|
|
710
804
|
if (this.containsTag(MorphologicalTag.CAUSATIVE) && this.containsTag(MorphologicalTag.PASSIVE)){
|
|
711
805
|
return "CauPass";
|
|
@@ -725,6 +819,11 @@ export class MorphologicalParse {
|
|
|
725
819
|
return undefined;
|
|
726
820
|
}
|
|
727
821
|
|
|
822
|
+
/**
|
|
823
|
+
* Returns the aspect of the verb parse for the universal dependency feature aspect.
|
|
824
|
+
* @return "Perf" for past, narrative and future tenses; "Prog" for progressive tenses; "Hab" for Aorist; "Rapid"
|
|
825
|
+
* for parses containing HASTILY tag; "Dur" for parses containing START, STAY or REPEAT tags.
|
|
826
|
+
*/
|
|
728
827
|
private getAspect(): string{
|
|
729
828
|
if (this.containsTag(MorphologicalTag.PASTTENSE) || this.containsTag(MorphologicalTag.NARRATIVE) ||
|
|
730
829
|
this.containsTag(MorphologicalTag.FUTURE)){
|
|
@@ -746,22 +845,40 @@ export class MorphologicalParse {
|
|
|
746
845
|
return undefined;
|
|
747
846
|
}
|
|
748
847
|
|
|
848
|
+
/**
|
|
849
|
+
* Returns the tense of the verb parse for universal dependency feature tense.
|
|
850
|
+
* @return "Past" for simple past tense; "Fut" for future tense; "Pqp" for narrative past tense; "Pres" for other
|
|
851
|
+
* past tenses.
|
|
852
|
+
*/
|
|
749
853
|
private getTense(): string{
|
|
750
|
-
if (this.containsTag(MorphologicalTag.PASTTENSE)){
|
|
854
|
+
if (this.containsTag(MorphologicalTag.NARRATIVE) && this.containsTag(MorphologicalTag.PASTTENSE)){
|
|
855
|
+
return "Pqp";
|
|
856
|
+
}
|
|
857
|
+
if (this.containsTag(MorphologicalTag.NARRATIVE) || this.containsTag(MorphologicalTag.PASTTENSE)){
|
|
751
858
|
return "Past";
|
|
752
859
|
}
|
|
753
860
|
if (this.containsTag(MorphologicalTag.FUTURE)){
|
|
754
861
|
return "Fut";
|
|
755
862
|
}
|
|
756
|
-
if (this.containsTag(MorphologicalTag.NARRATIVE) && this.containsTag(MorphologicalTag.PASTTENSE)){
|
|
757
|
-
return "Pqp";
|
|
758
|
-
}
|
|
759
863
|
if (!this.containsTag(MorphologicalTag.PASTTENSE) && !this.containsTag(MorphologicalTag.FUTURE)){
|
|
760
864
|
return "Pres";
|
|
761
865
|
}
|
|
762
866
|
return undefined;
|
|
763
867
|
}
|
|
764
868
|
|
|
869
|
+
/**
|
|
870
|
+
* Returns the modality of the verb parse for the universal dependency feature mood.
|
|
871
|
+
* @return "GenNecPot" if both necessitative and potential is combined with a suffix of general modality;
|
|
872
|
+
* "CndGenPot" if both conditional and potential is combined with a suffix of general modality;
|
|
873
|
+
* "GenNec" if necessitative is combined with a suffix of general modality;
|
|
874
|
+
* "GenPot" if potential is combined with a suffix of general modality;
|
|
875
|
+
* "NecPot" if necessitative is combined with potential;
|
|
876
|
+
* "DesPot" if desiderative is combined with potential;
|
|
877
|
+
* "CndPot" if conditional is combined with potential;
|
|
878
|
+
* "CndGen" if conditional is combined with a suffix of general modality;
|
|
879
|
+
* "Imp" for imperative; "Cnd" for simple conditional; "Des" for simple desiderative; "Opt" for optative; "Nec" for
|
|
880
|
+
* simple necessitative; "Pot" for simple potential; "Gen" for simple suffix of a general modality.
|
|
881
|
+
*/
|
|
765
882
|
private getMood(): string{
|
|
766
883
|
if ((this.containsTag(MorphologicalTag.COPULA) || this.containsTag(MorphologicalTag.AORIST)) &&
|
|
767
884
|
this.containsTag(MorphologicalTag.NECESSITY) && this.containsTag(MorphologicalTag.ABLE)){
|
|
@@ -824,6 +941,11 @@ export class MorphologicalParse {
|
|
|
824
941
|
return undefined;
|
|
825
942
|
}
|
|
826
943
|
|
|
944
|
+
/**
|
|
945
|
+
* Returns the form of the verb parse for the universal dependency feature verbForm.
|
|
946
|
+
* @return "Part" for participles; "Vnoun" for infinitives; "Conv" for parses contaning tags SINCEDOINGSO,
|
|
947
|
+
* WITHOUTHAVINGDONESO, WITHOUTBEINGABLETOHAVEDONESO, BYDOINGSO, AFTERDOINGSO, INFINITIVE3; "Fin" for others.
|
|
948
|
+
*/
|
|
827
949
|
private getVerbForm(): string{
|
|
828
950
|
if (this.containsTag(MorphologicalTag.PASTPARTICIPLE) || this.containsTag(MorphologicalTag.FUTUREPARTICIPLE) ||
|
|
829
951
|
this.containsTag(MorphologicalTag.PRESENTPARTICIPLE)){
|
|
@@ -850,6 +972,25 @@ export class MorphologicalParse {
|
|
|
850
972
|
return undefined;
|
|
851
973
|
}
|
|
852
974
|
|
|
975
|
+
private getEvident(): string{
|
|
976
|
+
if (this.containsTag(MorphologicalTag.NARRATIVE)){
|
|
977
|
+
return "Nfh";
|
|
978
|
+
} else {
|
|
979
|
+
if (this.containsTag(MorphologicalTag.COPULA) || this.containsTag(MorphologicalTag.ABLE) || this.containsTag(MorphologicalTag.AORIST) || this.containsTag(MorphologicalTag.PROGRESSIVE2)
|
|
980
|
+
|| this.containsTag(MorphologicalTag.DESIRE) || this.containsTag(MorphologicalTag.NECESSITY) || this.containsTag(MorphologicalTag.CONDITIONAL) || this.containsTag(MorphologicalTag.IMPERATIVE) || this.containsTag(MorphologicalTag.OPTATIVE)
|
|
981
|
+
|| this.containsTag(MorphologicalTag.PASTTENSE) || this.containsTag(MorphologicalTag.NARRATIVE) || this.containsTag(MorphologicalTag.PROGRESSIVE1) || this.containsTag(MorphologicalTag.FUTURE)) {
|
|
982
|
+
return "Fh";
|
|
983
|
+
}
|
|
984
|
+
}
|
|
985
|
+
return undefined;
|
|
986
|
+
}
|
|
987
|
+
|
|
988
|
+
/**
|
|
989
|
+
* Construct the universal dependency features as an array of strings. Each element represents a single feature.
|
|
990
|
+
* Every feature is given as featureType = featureValue.
|
|
991
|
+
* @param uPos Universal dependency part of speech tag for the parse.
|
|
992
|
+
* @return An array of universal dependency features for this parse.
|
|
993
|
+
*/
|
|
853
994
|
getUniversalDependencyFeatures(uPos: string): Array<string>{
|
|
854
995
|
let featureList = new Array<string>();
|
|
855
996
|
let pronType = this.getPronType();
|
|
@@ -868,7 +1009,7 @@ export class MorphologicalParse {
|
|
|
868
1009
|
if (degree != undefined && uPos.toUpperCase() != "ADJ"){
|
|
869
1010
|
featureList.push("Degree=" + degree);
|
|
870
1011
|
}
|
|
871
|
-
if (this.isNoun() || this.isVerb()){
|
|
1012
|
+
if (this.isNoun() || this.isVerb() || this.root.getName() == "mi" || (pronType != undefined && pronType != "Art")){
|
|
872
1013
|
let number = this.getNumber();
|
|
873
1014
|
if (number != undefined){
|
|
874
1015
|
featureList.push("Number=" + number);
|
|
@@ -886,7 +1027,7 @@ export class MorphologicalParse {
|
|
|
886
1027
|
featureList.push("Person[psor]=" + possessivePerson);
|
|
887
1028
|
}
|
|
888
1029
|
}
|
|
889
|
-
if (this.isNoun()) {
|
|
1030
|
+
if (this.isNoun() || (pronType != undefined && pronType != "Art")) {
|
|
890
1031
|
let case_ = this.getCase();
|
|
891
1032
|
if (case_ != undefined){
|
|
892
1033
|
featureList.push("Case=" + case_);
|
|
@@ -898,17 +1039,17 @@ export class MorphologicalParse {
|
|
|
898
1039
|
featureList.push("Definite=" + definite);
|
|
899
1040
|
}
|
|
900
1041
|
}
|
|
901
|
-
if (this.isVerb()){
|
|
1042
|
+
if (this.isVerb() || this.root.getName() == "mi"){
|
|
902
1043
|
let polarity = this.getPolarity();
|
|
903
1044
|
if (polarity != undefined){
|
|
904
1045
|
featureList.push("Polarity=" + polarity);
|
|
905
1046
|
}
|
|
906
1047
|
let voice = this.getVoice();
|
|
907
|
-
if (voice != undefined){
|
|
1048
|
+
if (voice != undefined && this.root.getName() != "mi"){
|
|
908
1049
|
featureList.push("Voice=" + voice);
|
|
909
1050
|
}
|
|
910
1051
|
let aspect = this.getAspect();
|
|
911
|
-
if (aspect != undefined && uPos.toUpperCase() != "PROPN"){
|
|
1052
|
+
if (aspect != undefined && uPos.toUpperCase() != "PROPN" && this.root.getName() != "mi"){
|
|
912
1053
|
featureList.push("Aspect=" + aspect);
|
|
913
1054
|
}
|
|
914
1055
|
let tense = this.getTense();
|
|
@@ -916,18 +1057,28 @@ export class MorphologicalParse {
|
|
|
916
1057
|
featureList.push("Tense=" + tense);
|
|
917
1058
|
}
|
|
918
1059
|
let mood = this.getMood();
|
|
919
|
-
if (mood != undefined && uPos.toUpperCase() != "PROPN"){
|
|
1060
|
+
if (mood != undefined && uPos.toUpperCase() != "PROPN" && this.root.getName() != "mi"){
|
|
920
1061
|
featureList.push("Mood=" + mood);
|
|
921
1062
|
}
|
|
922
1063
|
let verbForm = this.getVerbForm();
|
|
923
1064
|
if (verbForm != undefined && uPos.toUpperCase() != "PROPN"){
|
|
924
1065
|
featureList.push("VerbForm=" + verbForm);
|
|
925
1066
|
}
|
|
1067
|
+
let evident = this.getEvident();
|
|
1068
|
+
if (evident != undefined && this.root.getName() != "mi"){
|
|
1069
|
+
featureList.push("Evident=" + evident);
|
|
1070
|
+
}
|
|
926
1071
|
}
|
|
927
1072
|
featureList.sort();
|
|
928
1073
|
return featureList;
|
|
929
1074
|
}
|
|
930
1075
|
|
|
1076
|
+
/**
|
|
1077
|
+
* Returns the universal dependency part of speech for this parse.
|
|
1078
|
+
* @return "AUX" for word 'değil; "PROPN" for proper nouns; "NOUN for nouns; "ADJ" for adjectives; "ADV" for
|
|
1079
|
+
* adverbs; "INTJ" for interjections; "VERB" for verbs; "PUNCT" for punctuation symbols; "DET" for determiners;
|
|
1080
|
+
* "NUM" for numerals; "PRON" for pronouns; "ADP" for post participles; "SCONJ" or "CCONJ" for conjunctions.
|
|
1081
|
+
*/
|
|
931
1082
|
getUniversalDependencyPos(): string{
|
|
932
1083
|
let lemma = this.root.getName();
|
|
933
1084
|
if (lemma == "değil"){
|
|
@@ -4,6 +4,18 @@ import {TurkishLanguage} from "nlptoolkit-dictionary/dist/Language/TurkishLangua
|
|
|
4
4
|
|
|
5
5
|
export class MorphotacticEngine {
|
|
6
6
|
|
|
7
|
+
/**
|
|
8
|
+
* resolveD resolves the D metamorpheme to 'd' or 't' depending on the root and current formationToCheck. It adds
|
|
9
|
+
* 'd' if the root is an abbreviation; 't' if the last phoneme is one of the "çfhkpsşt" (fıstıkçı şahap) or 'd'
|
|
10
|
+
* otherwise; 't' if the word is a number ending with 3, 4, 5, 40, 60, or 70 or 'd' otherwise.
|
|
11
|
+
* @param root Root of the word
|
|
12
|
+
* @param formation Formation is current status of the wordform in the current state of the finite state machine. It
|
|
13
|
+
* is always equal to formationToCheck except the case where there is an apostrophe after the
|
|
14
|
+
* formationToCheck such as (3').
|
|
15
|
+
* @param formationToCheck FormationToCheck is current status of the wordform in the current state of the finite
|
|
16
|
+
* state machine except the apostrophe at the end if it exists.
|
|
17
|
+
* @return Formation with added 'd' or 't' character.
|
|
18
|
+
*/
|
|
7
19
|
static resolveD(root: TxtWord, formation: string, formationToCheck: string): string{
|
|
8
20
|
if (root.isAbbreviation()) {
|
|
9
21
|
return formation + 'd';
|
|
@@ -36,6 +48,21 @@ export class MorphotacticEngine {
|
|
|
36
48
|
}
|
|
37
49
|
}
|
|
38
50
|
|
|
51
|
+
/**
|
|
52
|
+
* resolveA resolves the A metamorpheme to 'a' or 'e' depending on the root and current formationToCheck. It adds
|
|
53
|
+
* 'e' if the root is an abbreviation; 'a' if the last vowel is a back vowel (except words that do not obey vowel
|
|
54
|
+
* harmony during agglutination); 'e' if the last vowel is a front vowel (except words that do not obey vowel
|
|
55
|
+
* harmony during agglutination); 'a' if the word is a number ending with 6, 9, 10, 30, 40, 60, or 90 or 'e'
|
|
56
|
+
* otherwise.
|
|
57
|
+
* @param root Root of the word
|
|
58
|
+
* @param formation Formation is current status of the wordform in the current state of the finite state machine. It
|
|
59
|
+
* is always equal to formationToCheck except the case where there is an apostrophe after the
|
|
60
|
+
* formationToCheck such as (3').
|
|
61
|
+
* @param rootWord True if the current word form is root form, false otherwise.
|
|
62
|
+
* @param formationToCheck FormationToCheck is current status of the wordform in the current state of the finite
|
|
63
|
+
* state machine except the apostrophe at the end if it exists.
|
|
64
|
+
* @return Formation with added 'a' or 'e' character.
|
|
65
|
+
*/
|
|
39
66
|
static resolveA(root: TxtWord, formation: string, rootWord: boolean, formationToCheck: string): string{
|
|
40
67
|
if (root.isAbbreviation()) {
|
|
41
68
|
return formation + 'e';
|
|
@@ -89,6 +116,26 @@ export class MorphotacticEngine {
|
|
|
89
116
|
return formation;
|
|
90
117
|
}
|
|
91
118
|
|
|
119
|
+
/**
|
|
120
|
+
* resolveH resolves the H metamorpheme to 'ı', 'i', 'u' or 'ü', depending on the current formationToCheck, root,
|
|
121
|
+
* and formation. It adds 'i' if the root is an abbreviation; 'ü' if the character before the last vowel is
|
|
122
|
+
* front rounded (or back rounded when the root word does not obey vowel harmony during agglutination); 'i' if the
|
|
123
|
+
* character before the last vowel is front unrounded; 'u' if the character before the last vowel is back rounded;
|
|
124
|
+
* 'ı' if the character before the last vowel is back unrounded (or front unrounded when the root word does not obey
|
|
125
|
+
* vowel harmony during agglutination); 'ı' if the word is a number ending with 6, 40, 60 or 90; 'ü' if the word
|
|
126
|
+
* is a number ending with 3, 4, or 00; 'u' if the word is a number ending with 9, 10, or 30; 'i' otherwise for
|
|
127
|
+
* numbers. Special case for 'Hyor' suffix is handled with resolveHforSpecialCaseTenseSuffix method.
|
|
128
|
+
* @param root Root of the word
|
|
129
|
+
* @param formation Formation is current status of the wordform in the current state of the finite state machine. It
|
|
130
|
+
* is always equal to formationToCheck except the case where there is an apostrophe after the
|
|
131
|
+
* formationToCheck such as (3').
|
|
132
|
+
* @param beginningOfSuffix True if H appears in the beginning of the suffix, false otherwise.
|
|
133
|
+
* @param specialCaseTenseSuffix True if the suffix is 'Hyor', false otherwise.
|
|
134
|
+
* @param rootWord True if the current word form is root form, false otherwise.
|
|
135
|
+
* @param formationToCheck FormationToCheck is current status of the word form in the current state of the finite
|
|
136
|
+
* state machine except the apostrophe at the end if it exists.
|
|
137
|
+
* @return Formation with possibly last character dropped and 'ı', 'i', 'u' or 'ü' character added.
|
|
138
|
+
*/
|
|
92
139
|
static resolveH(root: TxtWord, formation: string, beginningOfSuffix: boolean,
|
|
93
140
|
specialCaseTenseSuffix: boolean, rootWord: boolean, formationToCheck: string): string{
|
|
94
141
|
if (root.isAbbreviation())
|
|
@@ -138,7 +185,7 @@ export class MorphotacticEngine {
|
|
|
138
185
|
(TurkishLanguage.isBackRoundedVowel(Word.lastVowel(formationToCheck)) && root.notObeysVowelHarmonyDuringAgglutination())) {
|
|
139
186
|
return formation + 'ü';
|
|
140
187
|
}
|
|
141
|
-
if ((TurkishLanguage.isFrontUnroundedVowel(Word.lastVowel(formationToCheck)) && !root.notObeysVowelHarmonyDuringAgglutination()) ||
|
|
188
|
+
if ((TurkishLanguage.isFrontUnroundedVowel(Word.lastVowel(formationToCheck)) && (!root.notObeysVowelHarmonyDuringAgglutination() || !rootWord)) ||
|
|
142
189
|
((Word.lastVowel(formationToCheck) == 'a' || Word.lastVowel(formationToCheck) == 'â') && root.notObeysVowelHarmonyDuringAgglutination())) {
|
|
143
190
|
return formation + 'i';
|
|
144
191
|
}
|
|
@@ -168,6 +215,9 @@ export class MorphotacticEngine {
|
|
|
168
215
|
}
|
|
169
216
|
}
|
|
170
217
|
}
|
|
218
|
+
if (Word.lastVowel(formationToCheck) == '0'){
|
|
219
|
+
return formation + 'i';
|
|
220
|
+
}
|
|
171
221
|
return formation;
|
|
172
222
|
}
|
|
173
223
|
|
|
@@ -122,6 +122,13 @@ export class Transition {
|
|
|
122
122
|
return true;
|
|
123
123
|
}
|
|
124
124
|
|
|
125
|
+
/**
|
|
126
|
+
* The transitionPossibleFromRoot method takes root and current parse as inputs. It then checks some special cases.
|
|
127
|
+
*
|
|
128
|
+
* @param root Current root word
|
|
129
|
+
* @param fromState From which state we arrived to this state.
|
|
130
|
+
* @return true if transition is possible false otherwise
|
|
131
|
+
*/
|
|
125
132
|
transitionPossibleFromRoot(root: TxtWord, fromState: State){
|
|
126
133
|
if (root.isAdjective() && ((root.isNominal() && !root.isExceptional()) || root.isPronoun()) && this._toState.getName() == "NominalRoot(ADJ)" && this._with == "0") {
|
|
127
134
|
return false;
|
|
@@ -225,14 +232,21 @@ export class Transition {
|
|
|
225
232
|
}
|
|
226
233
|
|
|
227
234
|
/**
|
|
228
|
-
* The
|
|
229
|
-
*
|
|
230
|
-
*
|
|
231
|
-
*
|
|
232
|
-
*
|
|
233
|
-
*
|
|
234
|
-
*
|
|
235
|
-
*
|
|
235
|
+
* The method is main driving method to accomplish the current transition from one state to another depending on
|
|
236
|
+
* the root form of the word, current value of the word form, and the type of the start state. The method
|
|
237
|
+
* (a) returns the original word form if the transition is an epsilon transition, (b) adds 'nunla' if the current
|
|
238
|
+
* stem is 'bu', 'şu' or 'o', (c) returns 'bana' or 'sana' if the current stem is 'ben' or 'sen' respectively.
|
|
239
|
+
* For other cases, the method first modifies current stem and then adds the transition using special metamorpheme
|
|
240
|
+
* resolving methods. These cases are: (d) Converts 'y' of the first character of the transition to 'i' if the
|
|
241
|
+
* current stem is 'ye' or 'de'. (e) Drops the last two characters and adds last character when the transition is
|
|
242
|
+
* ('Hl' or 'Hn') and last 'I' drops during passive suffixation. (f) Adds 'y' character when the word ends with 'su'
|
|
243
|
+
* and the transition does not start with 'y'. (g) Adds the last character again when the root duplicates during
|
|
244
|
+
* suffixation. (h) Drops the last two characters and adds the last character when last 'i' drops during
|
|
245
|
+
* suffixation. (i) Replaces the last character with a soft one when the root soften during suffixation.
|
|
246
|
+
* @param root Root of the current word form
|
|
247
|
+
* @param stem Current word form
|
|
248
|
+
* @param startState The state from which this Fsm morphological analysis search has started.
|
|
249
|
+
* @return The current value of the word form after this transition is completed in the finite state machine.
|
|
236
250
|
*/
|
|
237
251
|
makeTransition(root: TxtWord, stem: string, startState?: State): string{
|
|
238
252
|
if (startState == undefined){
|
|
@@ -276,8 +290,9 @@ export class Transition {
|
|
|
276
290
|
} else {
|
|
277
291
|
//---showsSuRegularities---
|
|
278
292
|
//karasu->karasuyu, su->suyu, ağırsu->ağırsuyu, akarsu->akarsuyu, bengisu->bengisuyu
|
|
279
|
-
if (rootWord && root.showsSuRegularities() && this.startWithVowelorConsonantDrops()
|
|
293
|
+
if (rootWord && root.showsSuRegularities() && this.startWithVowelorConsonantDrops()) {
|
|
280
294
|
formation = stem + 'y';
|
|
295
|
+
i = 1;
|
|
281
296
|
formationToCheck = formation;
|
|
282
297
|
} else {
|
|
283
298
|
if (rootWord && root.duplicatesDuringSuffixation() && !startState.getName().startsWith("VerbalRoot") &&
|
|
@@ -419,6 +434,7 @@ export class Transition {
|
|
|
419
434
|
} else {
|
|
420
435
|
formation = MorphotacticEngine.resolveH(root, formation, i == 1, false, rootWord, formationToCheck);
|
|
421
436
|
}
|
|
437
|
+
rootWord = false
|
|
422
438
|
break;
|
|
423
439
|
case 'C':
|
|
424
440
|
formation = MorphotacticEngine.resolveC(formation, formationToCheck);
|