SinaTools 0.1.19__tar.gz → 0.1.21__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {SinaTools-0.1.19 → SinaTools-0.1.21}/PKG-INFO +1 -1
- {SinaTools-0.1.19 → SinaTools-0.1.21}/SinaTools.egg-info/PKG-INFO +1 -1
- SinaTools-0.1.21/sinatools/VERSION +1 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/synonyms/__init__.py +5 -5
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/wsd/__init__.py +2 -2
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/wsd/disambiguator.py +36 -33
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/wsd/settings.py +0 -6
- SinaTools-0.1.19/sinatools/VERSION +0 -1
- {SinaTools-0.1.19 → SinaTools-0.1.21}/AUTHORS.rst +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/CONTRIBUTING.rst +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/LICENSE +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/MANIFEST.in +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/README.rst +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/SinaTools.egg-info/SOURCES.txt +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/SinaTools.egg-info/dependency_links.txt +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/SinaTools.egg-info/entry_points.txt +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/SinaTools.egg-info/not-zip-safe +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/SinaTools.egg-info/requires.txt +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/SinaTools.egg-info/top_level.txt +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/Makefile +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/build/_images/download.png +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/build/_static/download.png +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/build/_static/file.png +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/build/_static/minus.png +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/build/_static/plus.png +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/build/html/_images/SinaLogo.jpg +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/build/html/_images/download.png +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/build/html/_static/SinaLogo.jpg +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/build/html/_static/download.png +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/build/html/_static/file.png +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/build/html/_static/minus.png +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/build/html/_static/plus.png +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/make.bat +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/source/License.rst +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/source/Overview.rst +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/source/_static/SinaLogo.jpg +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/source/_static/download.png +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/source/about.rst +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/source/api/DataDownload/downloader.rst +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/source/api/DataDownload.rst +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/source/api/arabiner/bin/infer.rst +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/source/api/arabiner.rst +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/source/api/morphology/morph_analyzer.rst +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/source/api/morphology.rst +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/source/api/salma/views.rst +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/source/api/salma.rst +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/source/api/utils/corpus_tokenizer.rst +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/source/api/utils/implication.rst +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/source/api/utils/jaccard.rst +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/source/api/utils/parser.rst +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/source/api/utils/sentence_tokenizer.rst +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/source/api/utils/text_transliteration.rst +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/source/api/utils.rst +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/source/api.rst +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/source/authors.rst +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/source/cli_tools/DataDownload/download_files.rst +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/source/cli_tools/DataDownload/get_appdatadir.rst +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/source/cli_tools/DataDownload.rst +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/source/cli_tools/arabiner/infer.rst +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/source/cli_tools/arabiner.rst +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/source/cli_tools/morphology/ALMA_multi_word.rst +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/source/cli_tools/morphology/morph_analyzer.rst +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/source/cli_tools/morphology.rst +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/source/cli_tools/salma/salma_tools.rst +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/source/cli_tools/salma.rst +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/source/cli_tools/utils/arStrip.rst +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/source/cli_tools/utils/corpus_tokenizer.rst +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/source/cli_tools/utils/implication.rst +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/source/cli_tools/utils/jaccard.rst +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/source/cli_tools/utils/latin_remove.rst +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/source/cli_tools/utils/remove_punc.rst +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/source/cli_tools/utils/sentence_tokenizer.rst +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/source/cli_tools/utils/text_transliteration.rst +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/source/cli_tools/utils.rst +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/source/cli_tools.rst +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/source/conf.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/source/index.rst +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/source/installation.rst +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/docs/source/readme.rst +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/setup.cfg +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/setup.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/CLI/DataDownload/download_files.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/CLI/morphology/ALMA_multi_word.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/CLI/morphology/morph_analyzer.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/CLI/ner/corpus_entity_extractor.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/CLI/ner/entity_extractor.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/CLI/utils/__init__.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/CLI/utils/arStrip.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/CLI/utils/corpus_tokenizer.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/CLI/utils/implication.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/CLI/utils/jaccard.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/CLI/utils/remove_latin.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/CLI/utils/remove_punctuation.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/CLI/utils/sentence_tokenizer.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/CLI/utils/text_dublication_detector.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/CLI/utils/text_transliteration.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/DataDownload/__init__.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/DataDownload/downloader.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/__init__.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/__init__.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/arabert/__init__.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/arabert/create_classification_data.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/arabert/create_pretraining_data.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/arabert/extract_features.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/arabert/lamb_optimizer.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/arabert/modeling.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/arabert/optimization.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/arabert/run_classifier.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/arabert/run_pretraining.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/arabert/run_squad.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/arabert/tokenization.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/araelectra/__init__.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/araelectra/build_openwebtext_pretraining_dataset.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/araelectra/build_pretraining_dataset.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/araelectra/build_pretraining_dataset_single_file.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/araelectra/configure_finetuning.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/araelectra/configure_pretraining.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/araelectra/finetune/__init__.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/araelectra/finetune/feature_spec.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/araelectra/finetune/preprocessing.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/araelectra/finetune/scorer.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/araelectra/finetune/task.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/araelectra/finetune/task_builder.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/araelectra/flops_computation.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/araelectra/model/__init__.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/araelectra/model/modeling.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/araelectra/model/optimization.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/araelectra/model/tokenization.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/araelectra/pretrain/__init__.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/araelectra/pretrain/pretrain_data.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/araelectra/pretrain/pretrain_helpers.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/araelectra/run_finetuning.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/araelectra/run_pretraining.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/araelectra/util/__init__.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/araelectra/util/training_utils.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/araelectra/util/utils.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/aragpt2/__init__.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/aragpt2/create_pretraining_data.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/aragpt2/gpt2/__init__.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/aragpt2/gpt2/lamb_optimizer.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/aragpt2/gpt2/optimization.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/aragpt2/gpt2/run_pretraining.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/aragpt2/grover/__init__.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/aragpt2/grover/dataloader.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/aragpt2/grover/modeling.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/aragpt2/grover/modeling_gpt2.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/aragpt2/grover/optimization_adafactor.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/aragpt2/grover/train_tpu.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/aragpt2/grover/utils.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/aragpt2/train_bpe_tokenizer.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/preprocess.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/environment.yml +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/install_env.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/morphology/ALMA_multi_word.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/morphology/__init__.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/morphology/morph_analyzer.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/ner/__init__.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/ner/data/__init__.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/ner/data/datasets.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/ner/data/transforms.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/ner/data_format.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/ner/datasets.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/ner/entity_extractor.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/ner/helpers.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/ner/metrics.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/ner/nn/BaseModel.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/ner/nn/BertNestedTagger.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/ner/nn/BertSeqTagger.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/ner/nn/__init__.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/ner/trainers/BaseTrainer.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/ner/trainers/BertNestedTrainer.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/ner/trainers/BertTrainer.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/ner/trainers/__init__.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/ner/transforms.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/semantic_relatedness/__init__.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/semantic_relatedness/compute_relatedness.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/sinatools.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/synonyms/synonyms_generator.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/utils/__init__.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/utils/charsets.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/utils/implication.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/utils/jaccard.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/utils/parser.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/utils/readfile.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/utils/text_dublication_detector.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/utils/text_transliteration.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/utils/tokenizer.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/utils/tokenizers_words.py +0 -0
- {SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/wsd/wsd.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: SinaTools
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.21
|
4
4
|
Summary: Open-source Python toolkit for Arabic Natural Understanding, allowing people to integrate it in their system workflow.
|
5
5
|
Home-page: https://github.com/SinaLab/sinatools
|
6
6
|
License: MIT license
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: SinaTools
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.21
|
4
4
|
Summary: Open-source Python toolkit for Arabic Natural Understanding, allowing people to integrate it in their system workflow.
|
5
5
|
Home-page: https://github.com/SinaLab/sinatools
|
6
6
|
License: MIT license
|
@@ -0,0 +1 @@
|
|
1
|
+
0.1.21
|
@@ -11,8 +11,8 @@ with open(file_path, 'rb') as f:
|
|
11
11
|
|
12
12
|
|
13
13
|
synonyms_level3_dict = {}
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
14
|
+
level3_dict = 'synonyms_level3.pkl'
|
15
|
+
path = downloader.get_appdatadir()
|
16
|
+
file_path = os.path.join(path, level3_dict)
|
17
|
+
with open(file_path, 'rb') as f:
|
18
|
+
synonyms_level3_dict = pickle.load(f, encoding='utf-8')
|
@@ -3,9 +3,9 @@ import pickle
|
|
3
3
|
from sinatools.DataDownload import downloader
|
4
4
|
import os
|
5
5
|
|
6
|
-
|
6
|
+
glosses_dic = {}
|
7
7
|
filename = 'glosses_dic.pickle'
|
8
8
|
path =downloader.get_appdatadir()
|
9
9
|
file_path = os.path.join(path, filename)
|
10
10
|
with open(file_path, 'rb') as f:
|
11
|
-
|
11
|
+
glosses_dic = pickle.load(f)
|
@@ -7,6 +7,7 @@ from sinatools.utils.tokenizers_words import simple_word_tokenize
|
|
7
7
|
from sinatools.morphology.ALMA_multi_word import ALMA_multi_word
|
8
8
|
from sinatools.morphology.morph_analyzer import analyze
|
9
9
|
from sinatools.ner.entity_extractor import extract
|
10
|
+
from . import glosses_dic
|
10
11
|
|
11
12
|
|
12
13
|
def distill_entities(entities):
|
@@ -135,12 +136,9 @@ def find_two_word_lemma(input_sentence):
|
|
135
136
|
glosses_list = []
|
136
137
|
concept_count = 0
|
137
138
|
ids = data[0]["ids"]
|
138
|
-
for
|
139
|
-
|
140
|
-
|
141
|
-
glosses_list.append(json.loads(value[1]))
|
142
|
-
concept_count = concept_count + value[0]
|
143
|
-
|
139
|
+
for concepts in ids:
|
140
|
+
glosses_list.append(json.loads(concepts))
|
141
|
+
concept_count = concept_count + data[0]["POS"]
|
144
142
|
found_2Word_lemma = [two_grams, glosses_list, i, i + 1, concept_count, data[0]['undiac_multi_word_lemma'], data[0]['multi_word_lemma']]
|
145
143
|
output.append(found_2Word_lemma)
|
146
144
|
i = i + 1
|
@@ -160,12 +158,9 @@ def find_three_word_lemma(input_sentence):
|
|
160
158
|
glosses_list = []
|
161
159
|
concept_count = 0
|
162
160
|
ids = data[0]["ids"]
|
163
|
-
for
|
164
|
-
|
165
|
-
|
166
|
-
glosses_list.append(json.loads(value[1]))
|
167
|
-
concept_count = concept_count + value[0]
|
168
|
-
|
161
|
+
for concepts in ids:
|
162
|
+
glosses_list.append(json.loads(concepts))
|
163
|
+
concept_count = concept_count + data[0]["POS"]
|
169
164
|
found_3Word_lemma = [three_grams, glosses_list, i, i + 2, concept_count, data[0]['undiac_multi_word_lemma'], data[0]['multi_word_lemma']]
|
170
165
|
output.append(found_3Word_lemma)
|
171
166
|
i = i + 1
|
@@ -184,11 +179,9 @@ def find_four_word_lemma(input_sentence):
|
|
184
179
|
glosses_list = []
|
185
180
|
concept_count = 0
|
186
181
|
ids = data[0]["ids"]
|
187
|
-
for
|
188
|
-
|
189
|
-
|
190
|
-
glosses_list.append(json.loads(value[1]))
|
191
|
-
concept_count = concept_count + value[0]
|
182
|
+
for concepts in ids:
|
183
|
+
glosses_list.append(json.loads(concepts))
|
184
|
+
concept_count = concept_count + data[0]["POS"]
|
192
185
|
found_4Word_lemma = [four_grams, glosses_list, i, i + 3, concept_count, data[0]['undiac_multi_word_lemma'], data[0]['multi_word_lemma']]
|
193
186
|
output.append(found_4Word_lemma)
|
194
187
|
i = i + 1
|
@@ -208,11 +201,9 @@ def find_five_word_lemma(input_sentence):
|
|
208
201
|
glosses_list = []
|
209
202
|
concept_count = 0
|
210
203
|
ids = data[0]["ids"]
|
211
|
-
for
|
212
|
-
|
213
|
-
|
214
|
-
glosses_list.append(json.loads(value[1]))
|
215
|
-
concept_count = concept_count + value[0]
|
204
|
+
for concepts in ids:
|
205
|
+
glosses_list.append(json.loads(concepts))
|
206
|
+
concept_count = concept_count + data[0]["POS"]
|
216
207
|
found_5Word_lemma = [five_grams, glosses_list, i, i + 4, concept_count, data[0]['undiac_multi_word_lemma'], data[0]['multi_word_lemma']]
|
217
208
|
output.append(found_5Word_lemma)
|
218
209
|
i = i + 1
|
@@ -276,16 +267,18 @@ def find_glosses_using_ALMA(word):
|
|
276
267
|
pos = data[0]["pos"]
|
277
268
|
Undiac_lemma = arStrip(Diac_lemma, True, True, True, True, True, False) # Remove diacs , smallDiacs , shaddah , digit , alif , specialChars
|
278
269
|
ids = []
|
279
|
-
|
270
|
+
# glosses_list = []
|
280
271
|
concept_count = 0
|
281
272
|
lemma_id = data[0]["lemma_id"]
|
282
|
-
if lemma_id in settings.glosses_dic.keys():
|
283
|
-
value = settings.glosses_dic[lemma_id]
|
284
|
-
glosses_list.append(json.loads(value[1]))
|
285
|
-
concept_count = concept_count + value[0]
|
286
273
|
|
287
|
-
|
274
|
+
if lemma_id in glosses_dic.keys():
|
275
|
+
value = glosses_dic[lemma_id]
|
276
|
+
glosses= json.loads(value[1])
|
277
|
+
# glosses_list.append(json.loads(value[1]))
|
278
|
+
concept_count = concept_count + value[0]
|
288
279
|
|
280
|
+
return word, Undiac_lemma, Diac_lemma, pos , concept_count, glosses
|
281
|
+
|
289
282
|
def disambiguate_glosses_using_SALMA(glosses, Diac_lemma, Undiac_lemma, word, sentence):
|
290
283
|
word = normalizearabert(word)
|
291
284
|
glosses_dictionary = {}
|
@@ -309,7 +302,7 @@ def disambiguate_glosses_using_SALMA(glosses, Diac_lemma, Undiac_lemma, word, se
|
|
309
302
|
return my_json
|
310
303
|
|
311
304
|
|
312
|
-
def find_glosses(input_sentence,
|
305
|
+
def find_glosses(input_sentence, two_word_lemma, three_word_lemma,four_word_lemma, five_word_lemma, ner):
|
313
306
|
output_list = []
|
314
307
|
position = 0
|
315
308
|
while position < len(input_sentence):
|
@@ -376,7 +369,7 @@ def find_glosses(input_sentence, three_word_lemma, two_word_lemma, four_word_lem
|
|
376
369
|
position = position + 1
|
377
370
|
|
378
371
|
|
379
|
-
|
372
|
+
|
380
373
|
output_from_ner = delete_form_list(position, ner)
|
381
374
|
ner = output_from_ner[0]
|
382
375
|
if output_from_ner[1] != []:
|
@@ -385,11 +378,13 @@ def find_glosses(input_sentence, three_word_lemma, two_word_lemma, four_word_lem
|
|
385
378
|
my_json = {}
|
386
379
|
word = output_from_ner[1][0][0]
|
387
380
|
my_json['word'] = word
|
388
|
-
my_json['concept_count'] = output_from_ner[1][0][2]
|
381
|
+
# my_json['concept_count'] = output_from_ner[1][0][2]
|
382
|
+
my_json['concept_count'] = '*'
|
389
383
|
my_json['glosses'] = output_from_ner[1][0][1]
|
390
384
|
my_json['Diac_lemma'] = output_from_ner[1][0][4]
|
391
385
|
my_json['Undiac_lemma'] = output_from_ner[1][0][3]
|
392
386
|
output_list.append(my_json)
|
387
|
+
# print("output list: ", output_list)
|
393
388
|
position = position + 1
|
394
389
|
|
395
390
|
if flag == "False": # Not found in ner or in multi_word_dictionary, ASK ALMA
|
@@ -417,6 +412,15 @@ def disambiguate_glosses_main(word, sentence):
|
|
417
412
|
my_json = {}
|
418
413
|
my_json['word'] = word['word']
|
419
414
|
glosses = word['glosses'][0]
|
415
|
+
# my_json['Gloss'] = glosses['gloss']
|
416
|
+
my_json['Concept_id'] = glosses['concept_id']
|
417
|
+
my_json['Diac_lemma'] = word['Diac_lemma']
|
418
|
+
my_json['Undiac_lemma'] = word['Undiac_lemma']
|
419
|
+
return my_json
|
420
|
+
elif concept_count == '*':
|
421
|
+
my_json = {}
|
422
|
+
my_json['word'] = word['word']
|
423
|
+
glosses = word['glosses'][0]
|
420
424
|
my_json['Gloss'] = glosses['gloss']
|
421
425
|
my_json['Concept_id'] = glosses['concept_id']
|
422
426
|
my_json['Diac_lemma'] = word['Diac_lemma']
|
@@ -444,8 +448,7 @@ def WSD(sentence):
|
|
444
448
|
|
445
449
|
ner = find_named_entities(" ".join(input_sentence))
|
446
450
|
|
447
|
-
output_list = find_glosses(input_sentence,
|
448
|
-
|
451
|
+
output_list = find_glosses(input_sentence, two_word_lemma, three_word_lemma, four_word_lemma, five_word_lemma, ner)
|
449
452
|
results = []
|
450
453
|
for word in output_list:
|
451
454
|
results.append(disambiguate_glosses_main(word, sentence))
|
@@ -9,7 +9,6 @@ import pandas as pd
|
|
9
9
|
from sinatools.DataDownload import downloader
|
10
10
|
import os
|
11
11
|
|
12
|
-
glosses_dic = {}
|
13
12
|
|
14
13
|
model_file_name = "bert-base-arabertv02_22_May_2021_00h_allglosses_unused01"
|
15
14
|
path =downloader.get_appdatadir()
|
@@ -21,11 +20,6 @@ tokenizer_file_path = os.path.join(path, tokenizer_file_name)
|
|
21
20
|
|
22
21
|
dftrue = pd.DataFrame()
|
23
22
|
|
24
|
-
# model = BertForSequenceClassification.from_pretrained('{}'.format("bert-base-arabertv02_22_May_2021_00h_allglosses_unused01"),
|
25
|
-
# output_hidden_states = True,
|
26
|
-
# num_labels=2
|
27
|
-
# )
|
28
|
-
|
29
23
|
model = BertForSequenceClassification.from_pretrained(model_file_path, output_hidden_states=True, num_labels=2)
|
30
24
|
|
31
25
|
tokenizer = BertTokenizer.from_pretrained('{}'.format(tokenizer_file_path))
|
@@ -1 +0,0 @@
|
|
1
|
-
0.1.19
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/arabert/create_classification_data.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/araelectra/build_pretraining_dataset.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/araelectra/finetune/preprocessing.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/araelectra/pretrain/pretrain_data.py
RENAMED
File without changes
|
{SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/araelectra/pretrain/pretrain_helpers.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{SinaTools-0.1.19 → SinaTools-0.1.21}/sinatools/arabert/aragpt2/grover/optimization_adafactor.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|