SinaTools 0.1.21__tar.gz → 0.1.22__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {SinaTools-0.1.21 → SinaTools-0.1.22}/PKG-INFO +1 -1
- {SinaTools-0.1.21 → SinaTools-0.1.22}/SinaTools.egg-info/PKG-INFO +1 -1
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/CLI/DataDownload/download_files.py +2 -2
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/DataDownload/downloader.py +37 -40
- SinaTools-0.1.22/sinatools/VERSION +1 -0
- SinaTools-0.1.21/sinatools/VERSION +0 -1
- {SinaTools-0.1.21 → SinaTools-0.1.22}/AUTHORS.rst +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/CONTRIBUTING.rst +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/LICENSE +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/MANIFEST.in +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/README.rst +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/SinaTools.egg-info/SOURCES.txt +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/SinaTools.egg-info/dependency_links.txt +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/SinaTools.egg-info/entry_points.txt +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/SinaTools.egg-info/not-zip-safe +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/SinaTools.egg-info/requires.txt +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/SinaTools.egg-info/top_level.txt +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/Makefile +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/build/_images/download.png +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/build/_static/download.png +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/build/_static/file.png +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/build/_static/minus.png +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/build/_static/plus.png +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/build/html/_images/SinaLogo.jpg +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/build/html/_images/download.png +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/build/html/_static/SinaLogo.jpg +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/build/html/_static/download.png +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/build/html/_static/file.png +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/build/html/_static/minus.png +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/build/html/_static/plus.png +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/make.bat +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/source/License.rst +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/source/Overview.rst +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/source/_static/SinaLogo.jpg +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/source/_static/download.png +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/source/about.rst +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/source/api/DataDownload/downloader.rst +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/source/api/DataDownload.rst +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/source/api/arabiner/bin/infer.rst +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/source/api/arabiner.rst +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/source/api/morphology/morph_analyzer.rst +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/source/api/morphology.rst +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/source/api/salma/views.rst +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/source/api/salma.rst +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/source/api/utils/corpus_tokenizer.rst +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/source/api/utils/implication.rst +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/source/api/utils/jaccard.rst +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/source/api/utils/parser.rst +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/source/api/utils/sentence_tokenizer.rst +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/source/api/utils/text_transliteration.rst +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/source/api/utils.rst +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/source/api.rst +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/source/authors.rst +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/source/cli_tools/DataDownload/download_files.rst +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/source/cli_tools/DataDownload/get_appdatadir.rst +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/source/cli_tools/DataDownload.rst +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/source/cli_tools/arabiner/infer.rst +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/source/cli_tools/arabiner.rst +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/source/cli_tools/morphology/ALMA_multi_word.rst +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/source/cli_tools/morphology/morph_analyzer.rst +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/source/cli_tools/morphology.rst +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/source/cli_tools/salma/salma_tools.rst +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/source/cli_tools/salma.rst +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/source/cli_tools/utils/arStrip.rst +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/source/cli_tools/utils/corpus_tokenizer.rst +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/source/cli_tools/utils/implication.rst +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/source/cli_tools/utils/jaccard.rst +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/source/cli_tools/utils/latin_remove.rst +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/source/cli_tools/utils/remove_punc.rst +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/source/cli_tools/utils/sentence_tokenizer.rst +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/source/cli_tools/utils/text_transliteration.rst +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/source/cli_tools/utils.rst +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/source/cli_tools.rst +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/source/conf.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/source/index.rst +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/source/installation.rst +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/docs/source/readme.rst +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/setup.cfg +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/setup.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/CLI/morphology/ALMA_multi_word.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/CLI/morphology/morph_analyzer.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/CLI/ner/corpus_entity_extractor.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/CLI/ner/entity_extractor.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/CLI/utils/__init__.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/CLI/utils/arStrip.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/CLI/utils/corpus_tokenizer.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/CLI/utils/implication.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/CLI/utils/jaccard.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/CLI/utils/remove_latin.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/CLI/utils/remove_punctuation.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/CLI/utils/sentence_tokenizer.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/CLI/utils/text_dublication_detector.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/CLI/utils/text_transliteration.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/DataDownload/__init__.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/__init__.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/__init__.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/arabert/__init__.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/arabert/create_classification_data.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/arabert/create_pretraining_data.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/arabert/extract_features.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/arabert/lamb_optimizer.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/arabert/modeling.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/arabert/optimization.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/arabert/run_classifier.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/arabert/run_pretraining.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/arabert/run_squad.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/arabert/tokenization.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/araelectra/__init__.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/araelectra/build_openwebtext_pretraining_dataset.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/araelectra/build_pretraining_dataset.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/araelectra/build_pretraining_dataset_single_file.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/araelectra/configure_finetuning.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/araelectra/configure_pretraining.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/araelectra/finetune/__init__.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/araelectra/finetune/feature_spec.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/araelectra/finetune/preprocessing.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/araelectra/finetune/scorer.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/araelectra/finetune/task.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/araelectra/finetune/task_builder.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/araelectra/flops_computation.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/araelectra/model/__init__.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/araelectra/model/modeling.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/araelectra/model/optimization.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/araelectra/model/tokenization.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/araelectra/pretrain/__init__.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/araelectra/pretrain/pretrain_data.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/araelectra/pretrain/pretrain_helpers.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/araelectra/run_finetuning.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/araelectra/run_pretraining.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/araelectra/util/__init__.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/araelectra/util/training_utils.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/araelectra/util/utils.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/aragpt2/__init__.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/aragpt2/create_pretraining_data.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/aragpt2/gpt2/__init__.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/aragpt2/gpt2/lamb_optimizer.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/aragpt2/gpt2/optimization.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/aragpt2/gpt2/run_pretraining.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/aragpt2/grover/__init__.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/aragpt2/grover/dataloader.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/aragpt2/grover/modeling.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/aragpt2/grover/modeling_gpt2.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/aragpt2/grover/optimization_adafactor.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/aragpt2/grover/train_tpu.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/aragpt2/grover/utils.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/aragpt2/train_bpe_tokenizer.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/preprocess.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/environment.yml +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/install_env.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/morphology/ALMA_multi_word.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/morphology/__init__.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/morphology/morph_analyzer.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/ner/__init__.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/ner/data/__init__.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/ner/data/datasets.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/ner/data/transforms.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/ner/data_format.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/ner/datasets.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/ner/entity_extractor.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/ner/helpers.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/ner/metrics.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/ner/nn/BaseModel.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/ner/nn/BertNestedTagger.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/ner/nn/BertSeqTagger.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/ner/nn/__init__.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/ner/trainers/BaseTrainer.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/ner/trainers/BertNestedTrainer.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/ner/trainers/BertTrainer.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/ner/trainers/__init__.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/ner/transforms.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/semantic_relatedness/__init__.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/semantic_relatedness/compute_relatedness.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/sinatools.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/synonyms/__init__.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/synonyms/synonyms_generator.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/utils/__init__.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/utils/charsets.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/utils/implication.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/utils/jaccard.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/utils/parser.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/utils/readfile.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/utils/text_dublication_detector.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/utils/text_transliteration.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/utils/tokenizer.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/utils/tokenizers_words.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/wsd/__init__.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/wsd/disambiguator.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/wsd/settings.py +0 -0
- {SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/wsd/wsd.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: SinaTools
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.22
|
4
4
|
Summary: Open-source Python toolkit for Arabic Natural Understanding, allowing people to integrate it in their system workflow.
|
5
5
|
Home-page: https://github.com/SinaLab/sinatools
|
6
6
|
License: MIT license
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: SinaTools
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.22
|
4
4
|
Summary: Open-source Python toolkit for Arabic Natural Understanding, allowing people to integrate it in their system workflow.
|
5
5
|
Home-page: https://github.com/SinaLab/sinatools
|
6
6
|
License: MIT license
|
@@ -2,7 +2,7 @@
|
|
2
2
|
About:
|
3
3
|
------
|
4
4
|
|
5
|
-
The download_files
|
5
|
+
The download_files is a command-line interface for downloading various NLP resources from pre-specified URLs. It is a part of the sinatools package and provides options to choose which files to download and to specify a download directory. The tool automatically handles file extraction for zip and tar.gz files.
|
6
6
|
|
7
7
|
Usage:
|
8
8
|
------
|
@@ -18,7 +18,7 @@ Below is the usage information that can be generated by running download_files -
|
|
18
18
|
|
19
19
|
Options:
|
20
20
|
-f, --files FILES
|
21
|
-
Names of the files to download. Available files are: ner, morph,
|
21
|
+
Names of the files to download. Available files are: ner, morph, wsd_model, wsd_tokenizer, glosses_dic, five_grams, four_grams, three_grams, two_grams, synonyms_level2, synonyms_level3.
|
22
22
|
If no file is specified, all files will be downloaded.
|
23
23
|
|
24
24
|
Examples:
|
@@ -6,16 +6,17 @@ import zipfile
|
|
6
6
|
from tqdm import tqdm
|
7
7
|
import tarfile
|
8
8
|
urls = {
|
9
|
-
'morph': 'https://portal.sina.birzeit.edu/
|
9
|
+
'morph': 'https://portal.sina.birzeit.edu/lemmas_dic.pickle',
|
10
10
|
'ner': 'https://portal.sina.birzeit.edu/Wj27012000.tar.gz',
|
11
|
-
'
|
12
|
-
'
|
11
|
+
'wsd_model': 'https://portal.sina.birzeit.edu/bert-base-arabertv02_22_May_2021_00h_allglosses_unused01.zip',
|
12
|
+
'wsd_tokenizer': 'https://portal.sina.birzeit.edu/bert-base-arabertv02.zip',
|
13
13
|
'glosses_dic': 'https://portal.sina.birzeit.edu/glosses_dic.pickle',
|
14
|
-
'lemma_dic': 'https://portal.sina.birzeit.edu/lemmas_dic.pickle',
|
15
14
|
'five_grams': 'https://portal.sina.birzeit.edu/five_grams.pickle',
|
16
15
|
'four_grams':'https://portal.sina.birzeit.edu/four_grams.pickle',
|
17
16
|
'three_grams':'https://portal.sina.birzeit.edu/three_grams.pickle',
|
18
|
-
'two_grams':'https://portal.sina.birzeit.edu/two_grams.pickle'
|
17
|
+
'two_grams':'https://portal.sina.birzeit.edu/two_grams.pickle',
|
18
|
+
'synonyms_level2':'https://portal.sina.birzeit.edu/synonyms_level2.pkl',
|
19
|
+
'synonyms_level3':'https://portal.sina.birzeit.edu/synonyms_level3.pkl'
|
19
20
|
}
|
20
21
|
|
21
22
|
def get_appdatadir():
|
@@ -94,41 +95,37 @@ def download_file(url, dest_path=get_appdatadir()):
|
|
94
95
|
print(filename)
|
95
96
|
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
|
96
97
|
|
97
|
-
try:
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
if e.response.status_code == 403:
|
129
|
-
print(f'Error 403: Forbidden. The requested file URL {url} could not be downloaded due to insufficient permissions. Please check the URL and try again.')
|
130
|
-
else:
|
131
|
-
print('An error occurred while downloading the file:', e)
|
98
|
+
# try:
|
99
|
+
with requests.get(url, headers=headers, stream=True) as r:
|
100
|
+
r.raise_for_status()
|
101
|
+
with open(file_path, 'wb') as f:
|
102
|
+
total_size = int(r.headers.get('content-length', 0))
|
103
|
+
block_size = 8192
|
104
|
+
progress_bar = tqdm(total=total_size, unit='iB', unit_scale=True)
|
105
|
+
for chunk in r.iter_content(chunk_size=block_size):
|
106
|
+
if chunk:
|
107
|
+
f.write(chunk)
|
108
|
+
progress_bar.update(len(chunk))
|
109
|
+
progress_bar.close()
|
110
|
+
# Check the file type and extract accordingly
|
111
|
+
file_extension = os.path.splitext(file_path)[1]
|
112
|
+
extracted_folder_name = os.path.splitext(file_path)[0]
|
113
|
+
|
114
|
+
if file_extension == '.zip':
|
115
|
+
extract_zip(file_path, extracted_folder_name)
|
116
|
+
elif file_extension == '.gz':
|
117
|
+
extract_tar(file_path, extracted_folder_name)
|
118
|
+
elif file_extension =='.pickle':
|
119
|
+
print(f'Done: {file_extension}')
|
120
|
+
else:
|
121
|
+
print(f'Unsupported file type for extraction: {file_extension}')
|
122
|
+
return file_path
|
123
|
+
|
124
|
+
# except requests.exceptions.HTTPError as e:
|
125
|
+
# if e.response.status_code == 403:
|
126
|
+
# print(f'Error 403: Forbidden. The requested file URL {url} could not be downloaded due to insufficient permissions. Please check the URL and try again.')
|
127
|
+
# else:
|
128
|
+
# print('An error occurred while downloading the file:', e)
|
132
129
|
|
133
130
|
def extract_zip(file_path, extracted_folder_name):
|
134
131
|
"""
|
@@ -0,0 +1 @@
|
|
1
|
+
0.1.22
|
@@ -1 +0,0 @@
|
|
1
|
-
0.1.21
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/arabert/create_classification_data.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/araelectra/build_pretraining_dataset.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/araelectra/finetune/preprocessing.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/araelectra/pretrain/pretrain_data.py
RENAMED
File without changes
|
{SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/araelectra/pretrain/pretrain_helpers.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{SinaTools-0.1.21 → SinaTools-0.1.22}/sinatools/arabert/aragpt2/grover/optimization_adafactor.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|