nlptoolkit-morphologicalanalysis 1.0.13 → 1.0.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/Corpus/DisambiguationCorpus.d.ts +7 -0
- package/dist/Corpus/DisambiguationCorpus.js +7 -0
- package/dist/Corpus/DisambiguationCorpus.js.map +1 -1
- package/dist/MorphologicalAnalysis/FsmMorphologicalAnalyzer.d.ts +71 -3
- package/dist/MorphologicalAnalysis/FsmMorphologicalAnalyzer.js +164 -41
- package/dist/MorphologicalAnalysis/FsmMorphologicalAnalyzer.js.map +1 -1
- package/dist/MorphologicalAnalysis/FsmParse.d.ts +9 -0
- package/dist/MorphologicalAnalysis/FsmParse.js +15 -0
- package/dist/MorphologicalAnalysis/FsmParse.js.map +1 -1
- package/dist/MorphologicalAnalysis/InflectionalGroup.js +3 -2
- package/dist/MorphologicalAnalysis/InflectionalGroup.js.map +1 -1
- package/dist/MorphologicalAnalysis/MorphologicalParse.d.ts +98 -0
- package/dist/MorphologicalAnalysis/MorphologicalParse.js +161 -10
- package/dist/MorphologicalAnalysis/MorphologicalParse.js.map +1 -1
- package/dist/MorphologicalAnalysis/MorphologicalTag.d.ts +9 -1
- package/dist/MorphologicalAnalysis/MorphologicalTag.js +8 -0
- package/dist/MorphologicalAnalysis/MorphologicalTag.js.map +1 -1
- package/dist/MorphologicalAnalysis/MorphotacticEngine.d.ts +47 -0
- package/dist/MorphologicalAnalysis/MorphotacticEngine.js +51 -1
- package/dist/MorphologicalAnalysis/MorphotacticEngine.js.map +1 -1
- package/dist/MorphologicalAnalysis/Transition.d.ts +22 -8
- package/dist/MorphologicalAnalysis/Transition.js +25 -9
- package/dist/MorphologicalAnalysis/Transition.js.map +1 -1
- package/package.json +2 -2
- package/parses/ac/314/247/304/261kla.txt +57 -3
- package/parses/ak.txt +72 -3
- package/parses/aksa.txt +40 -2
- package/parses/anla.txt +57 -3
- package/parses/azal.txt +63 -4
- package/parses/bo/314/210l.txt +53 -3
- package/parses/bul.txt +53 -3
- package/parses/cenk.txt +8 -0
- package/parses/cevapla.txt +74 -4
- package/parses/cos/314/247.txt +53 -3
- package/parses/c/314/247o/314/210k.txt +54 -3
- package/parses/c/314/247/304/261k.txt +59 -3
- package/parses/del.txt +47 -3
- package/parses/doldur.txt +47 -3
- package/parses/emlak.txt +2 -0
- package/parses/git.txt +59 -3
- package/parses/giy.txt +59 -3
- package/parses/go/314/210c/314/247.txt +59 -3
- package/parses/go/314/210ster.txt +63 -4
- package/parses/hal.txt +20 -4
- package/parses/kalp.txt +29 -4
- package/parses/kavur.txt +80 -5
- package/parses/kaydol.txt +69 -4
- package/parses/resim.txt +14 -0
- package/parses/s/304/261ska.txt +24 -0
- package/parses/ye.txt +40 -2
- package/parses/yemek.txt +6 -0
- package/parses/y/304/261ka.txt +90 -5
- package/parses/y/304/261ldo/314/210nu/314/210mu/314/210.txt +6 -0
- package/pronunciations.txt +490 -0
- package/source/Corpus/DisambiguationCorpus.ts +7 -0
- package/source/MorphologicalAnalysis/FsmMorphologicalAnalyzer.ts +171 -45
- package/source/MorphologicalAnalysis/FsmParse.ts +16 -1
- package/source/MorphologicalAnalysis/InflectionalGroup.ts +3 -2
- package/source/MorphologicalAnalysis/MorphologicalParse.ts +161 -10
- package/source/MorphologicalAnalysis/MorphologicalTag.ts +9 -1
- package/source/MorphologicalAnalysis/MorphotacticEngine.ts +51 -1
- package/source/MorphologicalAnalysis/Transition.ts +25 -9
- package/suffixes.txt +6313 -0
- package/tests/DisambiguationCorpusTest.js +14 -0
- package/tests/DisambiguationCorpusTest.js.map +1 -0
- package/tests/FiniteStateMachineTest.js +96 -0
- package/tests/FiniteStateMachineTest.js.map +1 -0
- package/tests/FiniteStateMachineTest.ts +1 -1
- package/tests/FsmMorphologicalAnalyzerTest.js +250 -0
- package/tests/FsmMorphologicalAnalyzerTest.js.map +1 -0
- package/tests/FsmMorphologicalAnalyzerTest.ts +12 -6
- package/tests/FsmParseListTest.js +100 -0
- package/tests/FsmParseListTest.js.map +1 -0
- package/tests/FsmParseTest.js +68 -0
- package/tests/FsmParseTest.js.map +1 -0
- package/tests/InflectionalGroupTest.js +86 -0
- package/tests/InflectionalGroupTest.js.map +1 -0
- package/tests/MorphologicalParseTest.js +154 -0
- package/tests/MorphologicalParseTest.js.map +1 -0
- package/tests/TransitionTest.js +184 -0
- package/tests/TransitionTest.js.map +1 -0
- package/tests/TransitionTest.ts +8 -0
- package/turkish_finite_state_machine.xml +11 -3
package/README.md
CHANGED
|
@@ -32,7 +32,7 @@ For Developers
|
|
|
32
32
|
============
|
|
33
33
|
|
|
34
34
|
You can also see [Python](https://github.com/starlangsoftware/TurkishMorphologicalAnalysis-Py),
|
|
35
|
-
[Java](https://github.com/starlangsoftware/TurkishMorphologicalAnalysis), [C++](https://github.com/starlangsoftware/TurkishMorphologicalAnalysis-CPP),
|
|
35
|
+
[Java](https://github.com/starlangsoftware/TurkishMorphologicalAnalysis), [C++](https://github.com/starlangsoftware/TurkishMorphologicalAnalysis-CPP), [C](https://github.com/starlangsoftware/TurkishMorphologicalAnalysis-C),
|
|
36
36
|
[Swift](https://github.com/starlangsoftware/TurkishMorphologicalAnalysis-Swift), [Cython](https://github.com/starlangsoftware/TurkishMorphologicalAnalysis-Cy),
|
|
37
37
|
or [C#](https://github.com/starlangsoftware/TurkishMorphologicalAnalysis-CS) repository.
|
|
38
38
|
|
|
@@ -1,4 +1,11 @@
|
|
|
1
1
|
import { Corpus } from "nlptoolkit-corpus/dist/Corpus";
|
|
2
2
|
export declare class DisambiguationCorpus extends Corpus {
|
|
3
|
+
/**
|
|
4
|
+
* Constructor which takes a file name {@link String} as an input and reads the file line by line. It takes each word of the line,
|
|
5
|
+
* and creates a new {@link DisambiguatedWord} with current word and its {@link MorphologicalParse}. It also creates a new {@link Sentence}
|
|
6
|
+
* when a new sentence starts, and adds each word to this sentence till the end of that sentence.
|
|
7
|
+
*
|
|
8
|
+
* @param fileName File which will be read and parsed.
|
|
9
|
+
*/
|
|
3
10
|
constructor(fileName?: string);
|
|
4
11
|
}
|
|
@@ -16,6 +16,13 @@
|
|
|
16
16
|
const MorphologicalParse_1 = require("../MorphologicalAnalysis/MorphologicalParse");
|
|
17
17
|
const Sentence_1 = require("nlptoolkit-corpus/dist/Sentence");
|
|
18
18
|
class DisambiguationCorpus extends Corpus_1.Corpus {
|
|
19
|
+
/**
|
|
20
|
+
* Constructor which takes a file name {@link String} as an input and reads the file line by line. It takes each word of the line,
|
|
21
|
+
* and creates a new {@link DisambiguatedWord} with current word and its {@link MorphologicalParse}. It also creates a new {@link Sentence}
|
|
22
|
+
* when a new sentence starts, and adds each word to this sentence till the end of that sentence.
|
|
23
|
+
*
|
|
24
|
+
* @param fileName File which will be read and parsed.
|
|
25
|
+
*/
|
|
19
26
|
constructor(fileName) {
|
|
20
27
|
super();
|
|
21
28
|
if (fileName != undefined) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"DisambiguationCorpus.js","sourceRoot":"","sources":["../../source/Corpus/DisambiguationCorpus.ts"],"names":[],"mappings":";;;;;;;;;;;;IAAA,0DAAqD;IACrD,yBAAyB;IACzB,2DAAsD;IACtD,oFAA+E;IAC/E,8DAAyD;IAEzD,MAAa,oBAAqB,SAAQ,eAAM;QAE5C,YAAY,QAAiB;YACzB,KAAK,EAAE,CAAC;YACR,IAAI,QAAQ,IAAI,SAAS,EAAC;gBACtB,IAAI,WAAW,GAAG,SAAS,CAAC;gBAC5B,IAAI,IAAI,GAAG,EAAE,CAAC,YAAY,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAA;gBAC5C,IAAI,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;gBAC5B,KAAK,IAAI,IAAI,IAAI,KAAK,EAAE;oBACpB,IAAI,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC;oBACjD,IAAI,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;oBACnD,IAAI,IAAI,IAAI,EAAE,IAAI,KAAK,IAAI,EAAE,EAAE;wBAC3B,IAAI,OAAO,GAAG,IAAI,qCAAiB,CAAC,IAAI,EAAE,IAAI,uCAAkB,CAAC,KAAK,CAAC,CAAC,CAAC;wBACzE,IAAI,IAAI,IAAI,KAAK,EAAE;4BACf,WAAW,GAAG,IAAI,mBAAQ,EAAE,CAAC;yBAChC;6BAAM;4BACH,IAAI,IAAI,IAAI,MAAM,EAAE;gCAChB,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC,CAAC;6BACjC;iCAAM;gCACH,IAAI,IAAI,IAAI,OAAO,IAAI,IAAI,IAAI,QAAQ,IAAI,IAAI,IAAI,SAAS,IAAI,IAAI,IAAI,UAAU,EAAE;iCACnF;qCAAM;oCACH,IAAI,WAAW,IAAI,IAAI,EAAE;wCACrB,WAAW,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;qCAChC;iCACJ;6BACJ;yBACJ;qBACJ;iBACJ;aACJ;QACL,CAAC;KAEJ;
|
|
1
|
+
{"version":3,"file":"DisambiguationCorpus.js","sourceRoot":"","sources":["../../source/Corpus/DisambiguationCorpus.ts"],"names":[],"mappings":";;;;;;;;;;;;IAAA,0DAAqD;IACrD,yBAAyB;IACzB,2DAAsD;IACtD,oFAA+E;IAC/E,8DAAyD;IAEzD,MAAa,oBAAqB,SAAQ,eAAM;QAE5C;;;;;;WAMG;QACH,YAAY,QAAiB;YACzB,KAAK,EAAE,CAAC;YACR,IAAI,QAAQ,IAAI,SAAS,EAAC;gBACtB,IAAI,WAAW,GAAG,SAAS,CAAC;gBAC5B,IAAI,IAAI,GAAG,EAAE,CAAC,YAAY,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAA;gBAC5C,IAAI,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;gBAC5B,KAAK,IAAI,IAAI,IAAI,KAAK,EAAE;oBACpB,IAAI,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC;oBACjD,IAAI,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;oBACnD,IAAI,IAAI,IAAI,EAAE,IAAI,KAAK,IAAI,EAAE,EAAE;wBAC3B,IAAI,OAAO,GAAG,IAAI,qCAAiB,CAAC,IAAI,EAAE,IAAI,uCAAkB,CAAC,KAAK,CAAC,CAAC,CAAC;wBACzE,IAAI,IAAI,IAAI,KAAK,EAAE;4BACf,WAAW,GAAG,IAAI,mBAAQ,EAAE,CAAC;yBAChC;6BAAM;4BACH,IAAI,IAAI,IAAI,MAAM,EAAE;gCAChB,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC,CAAC;6BACjC;iCAAM;gCACH,IAAI,IAAI,IAAI,OAAO,IAAI,IAAI,IAAI,QAAQ,IAAI,IAAI,IAAI,SAAS,IAAI,IAAI,IAAI,UAAU,EAAE;iCACnF;qCAAM;oCACH,IAAI,WAAW,IAAI,IAAI,EAAE;wCACrB,WAAW,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;qCAChC;iCACJ;6BACJ;yBACJ;qBACJ;iBACJ;aACJ;QACL,CAAC;KAEJ;IAvCD,oDAuCC"}
|
|
@@ -8,11 +8,13 @@ import { FsmParse } from "./FsmParse";
|
|
|
8
8
|
import { Sentence } from "nlptoolkit-corpus/dist/Sentence";
|
|
9
9
|
export declare class FsmMorphologicalAnalyzer {
|
|
10
10
|
private dictionaryTrie;
|
|
11
|
+
private suffixTrie;
|
|
11
12
|
private parsedSurfaceForms;
|
|
12
|
-
private
|
|
13
|
+
private pronunciations;
|
|
14
|
+
private readonly finiteStateMachine;
|
|
13
15
|
private static MAX_DISTANCE;
|
|
14
|
-
private dictionary;
|
|
15
|
-
private cache;
|
|
16
|
+
private readonly dictionary;
|
|
17
|
+
private readonly cache;
|
|
16
18
|
private mostUsedPatterns;
|
|
17
19
|
/**
|
|
18
20
|
* Another constructor of FsmMorphologicalAnalyzer class. It generates a new TxtDictionary type dictionary from
|
|
@@ -23,7 +25,29 @@ export declare class FsmMorphologicalAnalyzer {
|
|
|
23
25
|
* @param cacheSize the size of the LRUCache.
|
|
24
26
|
*/
|
|
25
27
|
constructor(fileName?: string, dictionaryFileNameOrDictionary?: any, cacheSize?: number);
|
|
28
|
+
/**
|
|
29
|
+
* Constructs and returns the reverse string of a given string.
|
|
30
|
+
* @param s String to be reversed.
|
|
31
|
+
* @return Reverse of a given string.
|
|
32
|
+
*/
|
|
33
|
+
private reverseString;
|
|
34
|
+
/**
|
|
35
|
+
* Constructs the suffix trie from the input file suffixes.txt. suffixes.txt contains the most frequent 6000
|
|
36
|
+
* suffixes that a verb or a noun can take. The suffix trie is a trie that stores these suffixes in reverse form,
|
|
37
|
+
* which can be then used to match a given word for its possible suffix content.
|
|
38
|
+
*/
|
|
39
|
+
private prepareSuffixTrie;
|
|
40
|
+
/**
|
|
41
|
+
* Reads the file for correct surface forms and their most frequent root forms, in other words, the surface forms
|
|
42
|
+
* which have at least one morphological analysis in Turkish.
|
|
43
|
+
* @param fileName Input file containing analyzable surface forms and their root forms.
|
|
44
|
+
*/
|
|
26
45
|
addParsedSurfaceForms(fileName: string): void;
|
|
46
|
+
/**
|
|
47
|
+
* Reads the file for foreign words and their pronunciations.
|
|
48
|
+
* @param fileName Input file containing foreign words and their pronunciations.
|
|
49
|
+
*/
|
|
50
|
+
addPronunciations(fileName: string): void;
|
|
27
51
|
/**
|
|
28
52
|
* The getPossibleWords method takes {@link MorphologicalParse} and {@link MetamorphicParse} as input.
|
|
29
53
|
* First it determines whether the given morphologicalParse is the root verb and whether it contains a verb tag.
|
|
@@ -316,6 +340,15 @@ export declare class FsmMorphologicalAnalyzer {
|
|
|
316
340
|
* @return ArrayList type initialFsmParse which holds the analyses.
|
|
317
341
|
*/
|
|
318
342
|
analysis(surfaceForm: string, isProper: boolean): Array<FsmParse>;
|
|
343
|
+
/**
|
|
344
|
+
* This method uses cache idea to speed up pattern matching in Fsm. mostUsedPatterns stores the compiled forms of
|
|
345
|
+
* the previously used patterns. When Fsm tries to match a string to a pattern, first we check if it exists in
|
|
346
|
+
* mostUsedPatterns. If it exists, we directly use the compiled pattern to match the string. Otherwise, new pattern
|
|
347
|
+
* is compiled and put in the mostUsedPatterns.
|
|
348
|
+
* @param expr Pattern to check
|
|
349
|
+
* @param value String to match the pattern
|
|
350
|
+
* @return True if the string matches the pattern, false otherwise.
|
|
351
|
+
*/
|
|
319
352
|
private patternMatches;
|
|
320
353
|
/**
|
|
321
354
|
* The isProperNoun method takes surfaceForm String as input and checks its each char whether they are in the range
|
|
@@ -332,6 +365,20 @@ export declare class FsmMorphologicalAnalyzer {
|
|
|
332
365
|
* @return true if it is a code-like word, return false otherwise.
|
|
333
366
|
*/
|
|
334
367
|
isCode(surfaceForm: string): boolean;
|
|
368
|
+
/**
|
|
369
|
+
* Identifies a possible new root word for a given surface form. It also adds the new root form to the dictionary
|
|
370
|
+
* for further usage. The method first searches the suffix trie for the reverse string of the surface form. This
|
|
371
|
+
* way, it can identify if the word has a suffix that is in the most frequently used suffix list. Since a word can
|
|
372
|
+
* have multiple possible suffixes, the method identifies the longest suffix and returns the substring of the
|
|
373
|
+
* surface form tht does not contain the suffix. Let say the word is 'googlelaştırdık', it will identify 'tık' as
|
|
374
|
+
* a suffix and will return 'googlelaştır' as a possible root form. Another example will be 'homelesslerimizle', it
|
|
375
|
+
* will identify 'lerimizle' as suffix and will return 'homeless' as a possible root form. If the root word ends
|
|
376
|
+
* with 'ğ', it is replacesd with 'k'. 'morfolojikliğini' will return 'morfolojikliğ' then which will be replaced
|
|
377
|
+
* with 'morfolojiklik'.
|
|
378
|
+
* @param surfaceForm Surface form for which we will identify a possible new root form.
|
|
379
|
+
* @return Possible new root form.
|
|
380
|
+
*/
|
|
381
|
+
private rootOfPossiblyNewWord;
|
|
335
382
|
/**
|
|
336
383
|
* The robustMorphologicalAnalysis is used to analyse surfaceForm String. First it gets the currentParse of the surfaceForm
|
|
337
384
|
* then, if the size of the currentParse is 0, and given surfaceForm is a proper noun, it adds the surfaceForm
|
|
@@ -379,9 +426,30 @@ export declare class FsmMorphologicalAnalyzer {
|
|
|
379
426
|
* @return true if surfaceForm matches with the regex.
|
|
380
427
|
*/
|
|
381
428
|
private isNumber;
|
|
429
|
+
/**
|
|
430
|
+
* Checks if a given surface form matches to a percent value. It should be something like %4, %45, %4.3 or %56.786
|
|
431
|
+
* @param surfaceForm Surface form to be checked.
|
|
432
|
+
* @return True if the surface form is in percent form
|
|
433
|
+
*/
|
|
382
434
|
private isPercent;
|
|
435
|
+
/**
|
|
436
|
+
* Checks if a given surface form matches to a time form. It should be something like 3:34, 12:56 etc.
|
|
437
|
+
* @param surfaceForm Surface form to be checked.
|
|
438
|
+
* @return True if the surface form is in time form
|
|
439
|
+
*/
|
|
383
440
|
private isTime;
|
|
441
|
+
/**
|
|
442
|
+
* Checks if a given surface form matches to a range form. It should be something like 123-1400 or 12:34-15:78 or
|
|
443
|
+
* 3.45-4.67.
|
|
444
|
+
* @param surfaceForm Surface form to be checked.
|
|
445
|
+
* @return True if the surface form is in range form
|
|
446
|
+
*/
|
|
384
447
|
private isRange;
|
|
448
|
+
/**
|
|
449
|
+
* Checks if a given surface form matches to a date form. It should be something like 3/10/2023 or 2.3.2012
|
|
450
|
+
* @param surfaceForm Surface form to be checked.
|
|
451
|
+
* @return True if the surface form is in date form
|
|
452
|
+
*/
|
|
385
453
|
private isDate;
|
|
386
454
|
/**
|
|
387
455
|
* The morphologicalAnalysis method is used to analyse a FsmParseList by comparing with the regex.
|
|
@@ -4,12 +4,13 @@
|
|
|
4
4
|
if (v !== undefined) module.exports = v;
|
|
5
5
|
}
|
|
6
6
|
else if (typeof define === "function" && define.amd) {
|
|
7
|
-
define(["require", "exports", "./FiniteStateMachine", "nlptoolkit-dictionary/dist/Dictionary/TxtDictionary", "nlptoolkit-datastructure/dist/LRUCache", "./FsmParseList", "nlptoolkit-dictionary/dist/Dictionary/WordComparator", "fs", "./Transition", "./MorphologicalTag", "nlptoolkit-dictionary/dist/Dictionary/TxtWord", "./FsmParse", "nlptoolkit-corpus/dist/Sentence", "nlptoolkit-dictionary/dist/Dictionary/Word", "./State", "nlptoolkit-datastructure/dist/Queue"], factory);
|
|
7
|
+
define(["require", "exports", "nlptoolkit-dictionary/dist/Dictionary/Trie/Trie", "./FiniteStateMachine", "nlptoolkit-dictionary/dist/Dictionary/TxtDictionary", "nlptoolkit-datastructure/dist/LRUCache", "./FsmParseList", "nlptoolkit-dictionary/dist/Dictionary/WordComparator", "fs", "./Transition", "./MorphologicalTag", "nlptoolkit-dictionary/dist/Dictionary/TxtWord", "./FsmParse", "nlptoolkit-corpus/dist/Sentence", "nlptoolkit-dictionary/dist/Dictionary/Word", "./State", "nlptoolkit-datastructure/dist/Queue", "nlptoolkit-util/dist/FileUtils"], factory);
|
|
8
8
|
}
|
|
9
9
|
})(function (require, exports) {
|
|
10
10
|
"use strict";
|
|
11
11
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
12
12
|
exports.FsmMorphologicalAnalyzer = void 0;
|
|
13
|
+
const Trie_1 = require("nlptoolkit-dictionary/dist/Dictionary/Trie/Trie");
|
|
13
14
|
const FiniteStateMachine_1 = require("./FiniteStateMachine");
|
|
14
15
|
const TxtDictionary_1 = require("nlptoolkit-dictionary/dist/Dictionary/TxtDictionary");
|
|
15
16
|
const LRUCache_1 = require("nlptoolkit-datastructure/dist/LRUCache");
|
|
@@ -24,6 +25,7 @@
|
|
|
24
25
|
const Word_1 = require("nlptoolkit-dictionary/dist/Dictionary/Word");
|
|
25
26
|
const State_1 = require("./State");
|
|
26
27
|
const Queue_1 = require("nlptoolkit-datastructure/dist/Queue");
|
|
28
|
+
const FileUtils_1 = require("nlptoolkit-util/dist/FileUtils");
|
|
27
29
|
class FsmMorphologicalAnalyzer {
|
|
28
30
|
/**
|
|
29
31
|
* Another constructor of FsmMorphologicalAnalyzer class. It generates a new TxtDictionary type dictionary from
|
|
@@ -35,6 +37,7 @@
|
|
|
35
37
|
*/
|
|
36
38
|
constructor(fileName, dictionaryFileNameOrDictionary, cacheSize) {
|
|
37
39
|
this.parsedSurfaceForms = undefined;
|
|
40
|
+
this.pronunciations = undefined;
|
|
38
41
|
this.cache = undefined;
|
|
39
42
|
this.mostUsedPatterns = new Map();
|
|
40
43
|
if (dictionaryFileNameOrDictionary == undefined) {
|
|
@@ -54,20 +57,54 @@
|
|
|
54
57
|
else {
|
|
55
58
|
this.finiteStateMachine = new FiniteStateMachine_1.FiniteStateMachine(fileName);
|
|
56
59
|
}
|
|
60
|
+
this.prepareSuffixTrie();
|
|
57
61
|
this.dictionaryTrie = this.dictionary.prepareTrie();
|
|
58
62
|
if (cacheSize > 0) {
|
|
59
63
|
this.cache = new LRUCache_1.LRUCache(cacheSize);
|
|
60
64
|
}
|
|
65
|
+
this.addPronunciations("pronunciations.txt");
|
|
61
66
|
}
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
67
|
+
/**
|
|
68
|
+
* Constructs and returns the reverse string of a given string.
|
|
69
|
+
* @param s String to be reversed.
|
|
70
|
+
* @return Reverse of a given string.
|
|
71
|
+
*/
|
|
72
|
+
reverseString(s) {
|
|
73
|
+
let result = "";
|
|
74
|
+
for (let i = s.length - 1; i >= 0; i--) {
|
|
75
|
+
result += s[i];
|
|
76
|
+
}
|
|
77
|
+
return result;
|
|
78
|
+
}
|
|
79
|
+
/**
|
|
80
|
+
* Constructs the suffix trie from the input file suffixes.txt. suffixes.txt contains the most frequent 6000
|
|
81
|
+
* suffixes that a verb or a noun can take. The suffix trie is a trie that stores these suffixes in reverse form,
|
|
82
|
+
* which can be then used to match a given word for its possible suffix content.
|
|
83
|
+
*/
|
|
84
|
+
prepareSuffixTrie() {
|
|
85
|
+
this.suffixTrie = new Trie_1.Trie();
|
|
86
|
+
let data = fs.readFileSync("suffixes.txt", 'utf8');
|
|
65
87
|
let lines = data.split("\n");
|
|
66
|
-
for (let
|
|
67
|
-
let
|
|
68
|
-
this.
|
|
88
|
+
for (let suffix of lines) {
|
|
89
|
+
let reverseSuffix = this.reverseString(suffix);
|
|
90
|
+
this.suffixTrie.addWord(reverseSuffix, new Word_1.Word(reverseSuffix));
|
|
69
91
|
}
|
|
70
92
|
}
|
|
93
|
+
/**
|
|
94
|
+
* Reads the file for correct surface forms and their most frequent root forms, in other words, the surface forms
|
|
95
|
+
* which have at least one morphological analysis in Turkish.
|
|
96
|
+
* @param fileName Input file containing analyzable surface forms and their root forms.
|
|
97
|
+
*/
|
|
98
|
+
addParsedSurfaceForms(fileName) {
|
|
99
|
+
this.parsedSurfaceForms = FileUtils_1.FileUtils.readHashMap(fileName);
|
|
100
|
+
}
|
|
101
|
+
/**
|
|
102
|
+
* Reads the file for foreign words and their pronunciations.
|
|
103
|
+
* @param fileName Input file containing foreign words and their pronunciations.
|
|
104
|
+
*/
|
|
105
|
+
addPronunciations(fileName) {
|
|
106
|
+
this.pronunciations = FileUtils_1.FileUtils.readHashMap(fileName);
|
|
107
|
+
}
|
|
71
108
|
/**
|
|
72
109
|
* The getPossibleWords method takes {@link MorphologicalParse} and {@link MetamorphicParse} as input.
|
|
73
110
|
* First it determines whether the given morphologicalParse is the root verb and whether it contains a verb tag.
|
|
@@ -659,7 +696,7 @@
|
|
|
659
696
|
*/
|
|
660
697
|
parseWordLength(fsmParse, maxLength) {
|
|
661
698
|
let result = new Array();
|
|
662
|
-
let
|
|
699
|
+
let resultTransitionList = new Array();
|
|
663
700
|
let parseQueue = new Queue_1.Queue(1000);
|
|
664
701
|
parseQueue.enqueueAll(fsmParse);
|
|
665
702
|
while (!parseQueue.isEmpty()) {
|
|
@@ -669,11 +706,11 @@
|
|
|
669
706
|
let currentState = currentFsmParse.getFinalSuffix();
|
|
670
707
|
let currentSurfaceForm = currentFsmParse.getSurfaceForm();
|
|
671
708
|
if (currentState.isEndState() && currentSurfaceForm.length <= maxLength) {
|
|
672
|
-
let
|
|
673
|
-
if (!
|
|
709
|
+
let currentTransitionList = currentSurfaceForm + " " + currentFsmParse.getFsmParseTransitionList();
|
|
710
|
+
if (!resultTransitionList.includes(currentTransitionList)) {
|
|
674
711
|
result.push(currentFsmParse);
|
|
675
712
|
currentFsmParse.constructInflectionalGroups();
|
|
676
|
-
|
|
713
|
+
resultTransitionList.push(currentTransitionList);
|
|
677
714
|
}
|
|
678
715
|
}
|
|
679
716
|
this.addNewParsesFromCurrentParseLength(currentFsmParse, parseQueue, maxLength, root);
|
|
@@ -690,7 +727,7 @@
|
|
|
690
727
|
*/
|
|
691
728
|
parseWordSurfaceForm(fsmParse, surfaceForm) {
|
|
692
729
|
let result = new Array();
|
|
693
|
-
let
|
|
730
|
+
let resultTransitionList = new Array();
|
|
694
731
|
let parseQueue = new Queue_1.Queue(1000);
|
|
695
732
|
parseQueue.enqueueAll(fsmParse);
|
|
696
733
|
while (!parseQueue.isEmpty()) {
|
|
@@ -700,11 +737,11 @@
|
|
|
700
737
|
let currentState = currentFsmParse.getFinalSuffix();
|
|
701
738
|
let currentSurfaceForm = currentFsmParse.getSurfaceForm();
|
|
702
739
|
if (currentState.isEndState() && currentSurfaceForm == surfaceForm) {
|
|
703
|
-
let
|
|
704
|
-
if (!
|
|
740
|
+
let currentTransitionList = currentFsmParse.getFsmParseTransitionList();
|
|
741
|
+
if (!resultTransitionList.includes(currentTransitionList)) {
|
|
705
742
|
result.push(currentFsmParse);
|
|
706
743
|
currentFsmParse.constructInflectionalGroups();
|
|
707
|
-
|
|
744
|
+
resultTransitionList.push(currentTransitionList);
|
|
708
745
|
}
|
|
709
746
|
}
|
|
710
747
|
this.addNewParsesFromCurrentParseSurfaceForm(currentFsmParse, parseQueue, surfaceForm, root);
|
|
@@ -968,6 +1005,15 @@
|
|
|
968
1005
|
initialFsmParse = this.initializeParseListFromSurfaceForm(surfaceForm, isProper);
|
|
969
1006
|
return this.parseWordSurfaceForm(initialFsmParse, surfaceForm);
|
|
970
1007
|
}
|
|
1008
|
+
/**
|
|
1009
|
+
* This method uses cache idea to speed up pattern matching in Fsm. mostUsedPatterns stores the compiled forms of
|
|
1010
|
+
* the previously used patterns. When Fsm tries to match a string to a pattern, first we check if it exists in
|
|
1011
|
+
* mostUsedPatterns. If it exists, we directly use the compiled pattern to match the string. Otherwise, new pattern
|
|
1012
|
+
* is compiled and put in the mostUsedPatterns.
|
|
1013
|
+
* @param expr Pattern to check
|
|
1014
|
+
* @param value String to match the pattern
|
|
1015
|
+
* @return True if the string matches the pattern, false otherwise.
|
|
1016
|
+
*/
|
|
971
1017
|
patternMatches(expr, value) {
|
|
972
1018
|
let p = this.mostUsedPatterns.get(expr);
|
|
973
1019
|
if (p == undefined) {
|
|
@@ -1003,6 +1049,40 @@
|
|
|
1003
1049
|
}
|
|
1004
1050
|
return this.patternMatches("^.*[0-9].*$", surfaceForm) && this.patternMatches("^.*[a-zA-ZçöğüşıÇÖĞÜŞİ].*$", surfaceForm);
|
|
1005
1051
|
}
|
|
1052
|
+
/**
|
|
1053
|
+
* Identifies a possible new root word for a given surface form. It also adds the new root form to the dictionary
|
|
1054
|
+
* for further usage. The method first searches the suffix trie for the reverse string of the surface form. This
|
|
1055
|
+
* way, it can identify if the word has a suffix that is in the most frequently used suffix list. Since a word can
|
|
1056
|
+
* have multiple possible suffixes, the method identifies the longest suffix and returns the substring of the
|
|
1057
|
+
* surface form tht does not contain the suffix. Let say the word is 'googlelaştırdık', it will identify 'tık' as
|
|
1058
|
+
* a suffix and will return 'googlelaştır' as a possible root form. Another example will be 'homelesslerimizle', it
|
|
1059
|
+
* will identify 'lerimizle' as suffix and will return 'homeless' as a possible root form. If the root word ends
|
|
1060
|
+
* with 'ğ', it is replacesd with 'k'. 'morfolojikliğini' will return 'morfolojikliğ' then which will be replaced
|
|
1061
|
+
* with 'morfolojiklik'.
|
|
1062
|
+
* @param surfaceForm Surface form for which we will identify a possible new root form.
|
|
1063
|
+
* @return Possible new root form.
|
|
1064
|
+
*/
|
|
1065
|
+
rootOfPossiblyNewWord(surfaceForm) {
|
|
1066
|
+
let words = this.suffixTrie.getWordsWithPrefix(this.reverseString(surfaceForm));
|
|
1067
|
+
let candidateWord = null;
|
|
1068
|
+
let candidateList = new Array();
|
|
1069
|
+
for (let word of words) {
|
|
1070
|
+
candidateWord = surfaceForm.substring(0, surfaceForm.length - word.getName().length);
|
|
1071
|
+
let newWord;
|
|
1072
|
+
if (candidateWord.endsWith("ğ")) {
|
|
1073
|
+
candidateWord = candidateWord.substring(0, candidateWord.length - 1) + "k";
|
|
1074
|
+
newWord = new TxtWord_1.TxtWord(candidateWord, "CL_ISIM");
|
|
1075
|
+
newWord.addFlag("IS_SD");
|
|
1076
|
+
}
|
|
1077
|
+
else {
|
|
1078
|
+
newWord = new TxtWord_1.TxtWord(candidateWord, "CL_ISIM");
|
|
1079
|
+
newWord.addFlag("CL_FIIL");
|
|
1080
|
+
}
|
|
1081
|
+
candidateList.push(newWord);
|
|
1082
|
+
this.dictionaryTrie.addWord(candidateWord, newWord);
|
|
1083
|
+
}
|
|
1084
|
+
return candidateList;
|
|
1085
|
+
}
|
|
1006
1086
|
/**
|
|
1007
1087
|
* The robustMorphologicalAnalysis is used to analyse surfaceForm String. First it gets the currentParse of the surfaceForm
|
|
1008
1088
|
* then, if the size of the currentParse is 0, and given surfaceForm is a proper noun, it adds the surfaceForm
|
|
@@ -1022,14 +1102,17 @@
|
|
|
1022
1102
|
if (this.isProperNoun(surfaceForm)) {
|
|
1023
1103
|
fsmParse.push(new FsmParse_1.FsmParse(surfaceForm, this.finiteStateMachine.getState("ProperRoot")));
|
|
1024
1104
|
}
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1105
|
+
if (this.isCode(surfaceForm)) {
|
|
1106
|
+
fsmParse.push(new FsmParse_1.FsmParse(surfaceForm, this.finiteStateMachine.getState("CodeRoot")));
|
|
1107
|
+
}
|
|
1108
|
+
let newCandidateList = this.rootOfPossiblyNewWord(surfaceForm);
|
|
1109
|
+
if (newCandidateList.length != 0) {
|
|
1110
|
+
for (let word of newCandidateList) {
|
|
1111
|
+
fsmParse.push(new FsmParse_1.FsmParse(word, this.finiteStateMachine.getState("VerbalRoot")));
|
|
1112
|
+
fsmParse.push(new FsmParse_1.FsmParse(word, this.finiteStateMachine.getState("NominalRoot")));
|
|
1031
1113
|
}
|
|
1032
1114
|
}
|
|
1115
|
+
fsmParse.push(new FsmParse_1.FsmParse(surfaceForm, this.finiteStateMachine.getState("NominalRoot")));
|
|
1033
1116
|
return new FsmParseList_1.FsmParseList(this.parseWordSurfaceForm(fsmParse, surfaceForm));
|
|
1034
1117
|
}
|
|
1035
1118
|
else {
|
|
@@ -1135,19 +1218,40 @@
|
|
|
1135
1218
|
}
|
|
1136
1219
|
return word == "" && count > 1;
|
|
1137
1220
|
}
|
|
1221
|
+
/**
|
|
1222
|
+
* Checks if a given surface form matches to a percent value. It should be something like %4, %45, %4.3 or %56.786
|
|
1223
|
+
* @param surfaceForm Surface form to be checked.
|
|
1224
|
+
* @return True if the surface form is in percent form
|
|
1225
|
+
*/
|
|
1138
1226
|
isPercent(surfaceForm) {
|
|
1139
1227
|
return this.patternMatches("^%(\\d\\d|\\d)$", surfaceForm) ||
|
|
1140
1228
|
this.patternMatches("^%(\\d\\d|\\d)\\.\\d+$", surfaceForm);
|
|
1141
1229
|
}
|
|
1230
|
+
/**
|
|
1231
|
+
* Checks if a given surface form matches to a time form. It should be something like 3:34, 12:56 etc.
|
|
1232
|
+
* @param surfaceForm Surface form to be checked.
|
|
1233
|
+
* @return True if the surface form is in time form
|
|
1234
|
+
*/
|
|
1142
1235
|
isTime(surfaceForm) {
|
|
1143
1236
|
return this.patternMatches("^(\\d\\d|\\d):(\\d\\d|\\d):(\\d\\d|\\d)$", surfaceForm) ||
|
|
1144
1237
|
this.patternMatches("^(\\d\\d|\\d):(\\d\\d|\\d)$", surfaceForm);
|
|
1145
1238
|
}
|
|
1239
|
+
/**
|
|
1240
|
+
* Checks if a given surface form matches to a range form. It should be something like 123-1400 or 12:34-15:78 or
|
|
1241
|
+
* 3.45-4.67.
|
|
1242
|
+
* @param surfaceForm Surface form to be checked.
|
|
1243
|
+
* @return True if the surface form is in range form
|
|
1244
|
+
*/
|
|
1146
1245
|
isRange(surfaceForm) {
|
|
1147
1246
|
return this.patternMatches("^\\d+-\\d+$", surfaceForm) ||
|
|
1148
1247
|
this.patternMatches("^(\\d\\d|\\d):(\\d\\d|\\d)-(\\d\\d|\\d):(\\d\\d|\\d)$", surfaceForm) ||
|
|
1149
1248
|
this.patternMatches("^(\\d\\d|\\d)\\.(\\d\\d|\\d)-(\\d\\d|\\d)\\.(\\d\\d|\\d)$", surfaceForm);
|
|
1150
1249
|
}
|
|
1250
|
+
/**
|
|
1251
|
+
* Checks if a given surface form matches to a date form. It should be something like 3/10/2023 or 2.3.2012
|
|
1252
|
+
* @param surfaceForm Surface form to be checked.
|
|
1253
|
+
* @return True if the surface form is in date form
|
|
1254
|
+
*/
|
|
1151
1255
|
isDate(surfaceForm) {
|
|
1152
1256
|
return this.patternMatches("^(\\d\\d|\\d)/(\\d\\d|\\d)/\\d+$", surfaceForm) ||
|
|
1153
1257
|
this.patternMatches("^(\\d\\d|\\d)\\.(\\d\\d|\\d)\\.\\d+$", surfaceForm);
|
|
@@ -1171,6 +1275,8 @@
|
|
|
1171
1275
|
*/
|
|
1172
1276
|
morphologicalAnalysis(surfaceForm) {
|
|
1173
1277
|
let lowerCased = surfaceForm.toLocaleLowerCase("tr");
|
|
1278
|
+
let possibleRootLowerCased = "", pronunciation = "";
|
|
1279
|
+
let isRootReplaced = false;
|
|
1174
1280
|
if (this.parsedSurfaceForms != undefined && this.parsedSurfaceForms.has(lowerCased) &&
|
|
1175
1281
|
!this.isInteger(surfaceForm) && !this.isDouble(surfaceForm) && !this.isPercent(surfaceForm) &&
|
|
1176
1282
|
!this.isTime(surfaceForm) && !this.isRange(surfaceForm) && !this.isDate(surfaceForm)) {
|
|
@@ -1182,9 +1288,9 @@
|
|
|
1182
1288
|
return this.cache.get(surfaceForm);
|
|
1183
1289
|
}
|
|
1184
1290
|
if (this.patternMatches("^(\\w|Ç|Ş|İ|Ü|Ö)\\.$", surfaceForm)) {
|
|
1185
|
-
this.dictionaryTrie.addWord(
|
|
1291
|
+
this.dictionaryTrie.addWord(lowerCased, new TxtWord_1.TxtWord(lowerCased, "IS_OA"));
|
|
1186
1292
|
}
|
|
1187
|
-
let defaultFsmParse = this.analysis(
|
|
1293
|
+
let defaultFsmParse = this.analysis(lowerCased, this.isProperNoun(surfaceForm));
|
|
1188
1294
|
if (defaultFsmParse.length > 0) {
|
|
1189
1295
|
let fsmParseList = new FsmParseList_1.FsmParseList(defaultFsmParse);
|
|
1190
1296
|
if (this.cache != undefined) {
|
|
@@ -1198,57 +1304,69 @@
|
|
|
1198
1304
|
if (possibleRoot != "") {
|
|
1199
1305
|
if (possibleRoot.includes("/") || possibleRoot.includes("\\/")) {
|
|
1200
1306
|
this.dictionaryTrie.addWord(possibleRoot, new TxtWord_1.TxtWord(possibleRoot, "IS_KESIR"));
|
|
1201
|
-
fsmParse = this.analysis(
|
|
1307
|
+
fsmParse = this.analysis(lowerCased, this.isProperNoun(surfaceForm));
|
|
1202
1308
|
}
|
|
1203
1309
|
else {
|
|
1204
1310
|
if (this.isDate(possibleRoot)) {
|
|
1205
1311
|
this.dictionaryTrie.addWord(possibleRoot, new TxtWord_1.TxtWord(possibleRoot, "IS_DATE"));
|
|
1206
|
-
fsmParse = this.analysis(
|
|
1312
|
+
fsmParse = this.analysis(lowerCased, this.isProperNoun(surfaceForm));
|
|
1207
1313
|
}
|
|
1208
1314
|
else {
|
|
1209
1315
|
if (this.patternMatches("^\\d+/\\d+$", possibleRoot)) {
|
|
1210
1316
|
this.dictionaryTrie.addWord(possibleRoot, new TxtWord_1.TxtWord(possibleRoot, "IS_KESIR"));
|
|
1211
|
-
fsmParse = this.analysis(
|
|
1317
|
+
fsmParse = this.analysis(lowerCased, this.isProperNoun(surfaceForm));
|
|
1212
1318
|
}
|
|
1213
1319
|
else {
|
|
1214
1320
|
if (this.isPercent(possibleRoot)) {
|
|
1215
1321
|
this.dictionaryTrie.addWord(possibleRoot, new TxtWord_1.TxtWord(possibleRoot, "IS_PERCENT"));
|
|
1216
|
-
fsmParse = this.analysis(
|
|
1322
|
+
fsmParse = this.analysis(lowerCased, this.isProperNoun(surfaceForm));
|
|
1217
1323
|
}
|
|
1218
1324
|
else {
|
|
1219
1325
|
if (this.isTime(surfaceForm)) {
|
|
1220
1326
|
this.dictionaryTrie.addWord(possibleRoot, new TxtWord_1.TxtWord(possibleRoot, "IS_ZAMAN"));
|
|
1221
|
-
fsmParse = this.analysis(
|
|
1327
|
+
fsmParse = this.analysis(lowerCased, this.isProperNoun(surfaceForm));
|
|
1222
1328
|
}
|
|
1223
1329
|
else {
|
|
1224
1330
|
if (this.isRange(surfaceForm)) {
|
|
1225
1331
|
this.dictionaryTrie.addWord(possibleRoot, new TxtWord_1.TxtWord(possibleRoot, "IS_RANGE"));
|
|
1226
|
-
fsmParse = this.analysis(
|
|
1332
|
+
fsmParse = this.analysis(lowerCased, this.isProperNoun(surfaceForm));
|
|
1227
1333
|
}
|
|
1228
1334
|
else {
|
|
1229
1335
|
if (this.isInteger(possibleRoot)) {
|
|
1230
1336
|
this.dictionaryTrie.addWord(possibleRoot, new TxtWord_1.TxtWord(possibleRoot, "IS_SAYI"));
|
|
1231
|
-
fsmParse = this.analysis(
|
|
1337
|
+
fsmParse = this.analysis(lowerCased, this.isProperNoun(surfaceForm));
|
|
1232
1338
|
}
|
|
1233
1339
|
else {
|
|
1234
1340
|
if (this.isDouble(possibleRoot)) {
|
|
1235
1341
|
this.dictionaryTrie.addWord(possibleRoot, new TxtWord_1.TxtWord(possibleRoot, "IS_REELSAYI"));
|
|
1236
|
-
fsmParse = this.analysis(
|
|
1342
|
+
fsmParse = this.analysis(lowerCased, this.isProperNoun(surfaceForm));
|
|
1237
1343
|
}
|
|
1238
1344
|
else {
|
|
1239
|
-
if (Word_1.Word.isCapital(possibleRoot)) {
|
|
1345
|
+
if (Word_1.Word.isCapital(possibleRoot) || "QXW".includes(possibleRoot.substring(0, 1))) {
|
|
1240
1346
|
let newWord = undefined;
|
|
1241
|
-
|
|
1242
|
-
|
|
1347
|
+
possibleRootLowerCased = possibleRoot.toLocaleLowerCase("tr");
|
|
1348
|
+
if (this.pronunciations.has(possibleRootLowerCased)) {
|
|
1349
|
+
isRootReplaced = true;
|
|
1350
|
+
pronunciation = this.pronunciations.get(possibleRootLowerCased);
|
|
1351
|
+
if (this.dictionary.getWord(pronunciation) != null) {
|
|
1352
|
+
this.dictionary.getWord(pronunciation).addFlag("IS_OA");
|
|
1353
|
+
}
|
|
1354
|
+
else {
|
|
1355
|
+
newWord = new TxtWord_1.TxtWord(pronunciation, "IS_OA");
|
|
1356
|
+
this.dictionaryTrie.addWord(pronunciation, newWord);
|
|
1357
|
+
}
|
|
1358
|
+
let replacedWord = pronunciation + lowerCased.substring(possibleRootLowerCased.length);
|
|
1359
|
+
fsmParse = this.analysis(replacedWord, this.isProperNoun(surfaceForm));
|
|
1243
1360
|
}
|
|
1244
1361
|
else {
|
|
1245
|
-
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
|
|
1250
|
-
|
|
1251
|
-
|
|
1362
|
+
if (this.dictionary.getWord(possibleRootLowerCased) != null) {
|
|
1363
|
+
this.dictionary.getWord(possibleRootLowerCased).addFlag("IS_OA");
|
|
1364
|
+
}
|
|
1365
|
+
else {
|
|
1366
|
+
newWord = new TxtWord_1.TxtWord(possibleRootLowerCased, "IS_OA");
|
|
1367
|
+
this.dictionaryTrie.addWord(possibleRootLowerCased, newWord);
|
|
1368
|
+
}
|
|
1369
|
+
fsmParse = this.analysis(lowerCased, this.isProperNoun(surfaceForm));
|
|
1252
1370
|
}
|
|
1253
1371
|
}
|
|
1254
1372
|
}
|
|
@@ -1261,6 +1379,11 @@
|
|
|
1261
1379
|
}
|
|
1262
1380
|
}
|
|
1263
1381
|
}
|
|
1382
|
+
if (!isRootReplaced) {
|
|
1383
|
+
for (let parse of fsmParse) {
|
|
1384
|
+
parse.restoreOriginalForm(possibleRootLowerCased, pronunciation);
|
|
1385
|
+
}
|
|
1386
|
+
}
|
|
1264
1387
|
let fsmParseList = new FsmParseList_1.FsmParseList(fsmParse);
|
|
1265
1388
|
if (this.cache != undefined && fsmParseList.size() > 0) {
|
|
1266
1389
|
this.cache.add(surfaceForm, fsmParseList);
|