SinaTools 0.1.27__py2.py3-none-any.whl → 0.1.28__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {SinaTools-0.1.27.dist-info → SinaTools-0.1.28.dist-info}/METADATA +2 -2
- {SinaTools-0.1.27.dist-info → SinaTools-0.1.28.dist-info}/RECORD +27 -26
- sinatools/CLI/DataDownload/download_files.py +2 -5
- sinatools/CLI/morphology/ALMA_multi_word.py +0 -34
- sinatools/CLI/morphology/morph_analyzer.py +1 -1
- sinatools/CLI/ner/corpus_entity_extractor.py +17 -4
- sinatools/CLI/ner/entity_extractor.py +8 -8
- sinatools/CLI/utils/implication.py +3 -3
- sinatools/VERSION +1 -1
- sinatools/morphology/morph_analyzer.py +44 -45
- sinatools/ner/entity_extractor.py +41 -0
- sinatools/semantic_relatedness/compute_relatedness.py +22 -0
- sinatools/synonyms/synonyms_generator.py +45 -1
- sinatools/utils/jaccard.py +1 -1
- sinatools/utils/parser.py +12 -15
- sinatools/utils/similarity.py +95 -4
- sinatools/utils/text_dublication_detector.py +22 -0
- sinatools/utils/text_transliteration.py +1 -1
- sinatools/utils/tokenizer.py +1 -1
- sinatools/utils/word_compare.py +667 -0
- sinatools/wsd/disambiguator.py +20 -19
- {SinaTools-0.1.27.data → SinaTools-0.1.28.data}/data/sinatools/environment.yml +0 -0
- {SinaTools-0.1.27.dist-info → SinaTools-0.1.28.dist-info}/AUTHORS.rst +0 -0
- {SinaTools-0.1.27.dist-info → SinaTools-0.1.28.dist-info}/LICENSE +0 -0
- {SinaTools-0.1.27.dist-info → SinaTools-0.1.28.dist-info}/WHEEL +0 -0
- {SinaTools-0.1.27.dist-info → SinaTools-0.1.28.dist-info}/entry_points.txt +0 -0
- {SinaTools-0.1.27.dist-info → SinaTools-0.1.28.dist-info}/top_level.txt +0 -0
sinatools/wsd/disambiguator.py
CHANGED
@@ -457,13 +457,13 @@ def WSD(sentence):
|
|
457
457
|
|
458
458
|
def disambiguate(sentence):
|
459
459
|
"""
|
460
|
-
This method
|
460
|
+
This method is a pipeline of five methods. Given a sentence as input, this method tags each word in the sentence with the following: Lemma, single-word sense, multi-word sense, and NER tag. The disambiguation of single/multi-word senses is done using our ArabGlossBERT TSV model. You can try the demo online. For more details read the article.
|
461
461
|
|
462
462
|
Args:
|
463
|
-
sentence (:obj:`str`): The Arabic text to be disambiguated
|
463
|
+
sentence (:obj:`str`): The Arabic text to be disambiguated.
|
464
464
|
|
465
465
|
Returns:
|
466
|
-
:obj:`list`:
|
466
|
+
:obj:`list`: A list of JSON objects, with each word having a concept id if it exists or a lemma if no gloss is found.
|
467
467
|
|
468
468
|
**Example:**
|
469
469
|
|
@@ -475,22 +475,23 @@ def disambiguate(sentence):
|
|
475
475
|
print(result)
|
476
476
|
|
477
477
|
#output
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
478
|
+
[{
|
479
|
+
'concept_id': '303051631',
|
480
|
+
'word': 'تمشيت',
|
481
|
+
'lemma': 'تَمَشَّى'
|
482
|
+
},{
|
483
|
+
'concept_id': '303005470',
|
484
|
+
'word': 'بين',
|
485
|
+
'lemma': 'بَيْن'
|
486
|
+
},{
|
487
|
+
'concept_id': '303007335',
|
488
|
+
'word': 'الجداول',
|
489
|
+
'lemma': 'جَدْوَلٌ'
|
490
|
+
},{
|
491
|
+
'concept_id': '303056588',
|
492
|
+
'word': 'والأنهار',
|
493
|
+
'lemma': 'نَهْرٌ'
|
494
|
+
}]
|
494
495
|
"""
|
495
496
|
if len(sentence) > 500:
|
496
497
|
content = ["Input is too long"]
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|