SinaTools 0.1.29__py2.py3-none-any.whl → 0.1.30__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {SinaTools-0.1.29.dist-info → SinaTools-0.1.30.dist-info}/METADATA +1 -1
- {SinaTools-0.1.29.dist-info → SinaTools-0.1.30.dist-info}/RECORD +12 -12
- sinatools/DataDownload/downloader.py +4 -3
- sinatools/VERSION +1 -1
- sinatools/morphology/morph_analyzer.py +7 -3
- sinatools/ner/__init__.py +1 -2
- {SinaTools-0.1.29.data → SinaTools-0.1.30.data}/data/sinatools/environment.yml +0 -0
- {SinaTools-0.1.29.dist-info → SinaTools-0.1.30.dist-info}/AUTHORS.rst +0 -0
- {SinaTools-0.1.29.dist-info → SinaTools-0.1.30.dist-info}/LICENSE +0 -0
- {SinaTools-0.1.29.dist-info → SinaTools-0.1.30.dist-info}/WHEEL +0 -0
- {SinaTools-0.1.29.dist-info → SinaTools-0.1.30.dist-info}/entry_points.txt +0 -0
- {SinaTools-0.1.29.dist-info → SinaTools-0.1.30.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: SinaTools
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.30
|
4
4
|
Summary: Open-source Python toolkit for Arabic Natural Understanding, allowing people to integrate it in their system workflow.
|
5
5
|
Home-page: https://github.com/SinaLab/sinatools
|
6
6
|
License: MIT license
|
@@ -1,5 +1,5 @@
|
|
1
|
-
SinaTools-0.1.
|
2
|
-
sinatools/VERSION,sha256=
|
1
|
+
SinaTools-0.1.30.data/data/sinatools/environment.yml,sha256=OzilhLjZbo_3nU93EQNUFX-6G5O3newiSWrwxvMH2Os,7231
|
2
|
+
sinatools/VERSION,sha256=4IPaHhHWuxBZjZ0tYxwy4rdWjvuIZronPKqN26wZ7eE,6
|
3
3
|
sinatools/__init__.py,sha256=bEosTU1o-FSpyytS6iVP_82BXHF2yHnzpJxPLYRbeII,135
|
4
4
|
sinatools/environment.yml,sha256=OzilhLjZbo_3nU93EQNUFX-6G5O3newiSWrwxvMH2Os,7231
|
5
5
|
sinatools/install_env.py,sha256=EODeeE0ZzfM_rz33_JSIruX03Nc4ghyVOM5BHVhsZaQ,404
|
@@ -20,7 +20,7 @@ sinatools/CLI/utils/sentence_tokenizer.py,sha256=Wli8eiDbWSd_Z8UKpu_JkaS8jImowa1
|
|
20
20
|
sinatools/CLI/utils/text_dublication_detector.py,sha256=dW70O5O20GxeUDDF6zVYn52wWLmJF-HBZgvqIeVL2rQ,1661
|
21
21
|
sinatools/CLI/utils/text_transliteration.py,sha256=vz-3kxWf8pNYVCqNAtBAiA6u_efrS5NtWT-ofN1NX6I,2014
|
22
22
|
sinatools/DataDownload/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
23
|
-
sinatools/DataDownload/downloader.py,sha256=
|
23
|
+
sinatools/DataDownload/downloader.py,sha256=3UkRRH4TLbut10V1BgWO3EqJQaHVBqr6pAj7Fn4AQZ8,6511
|
24
24
|
sinatools/arabert/__init__.py,sha256=ely2PttjgSv7vKdzskuD1rtK_l_UOpmxJSz8isrveD0,16
|
25
25
|
sinatools/arabert/preprocess.py,sha256=qI0FsuMTOzdRlYGCtLrjpXgikNElUZPv9bnjaKDZKJ4,33024
|
26
26
|
sinatools/arabert/arabert/__init__.py,sha256=KbSAH-XqbRygn0y59m5-ZYOLXgpT1gSgE3F-qd4rKEc,627
|
@@ -75,8 +75,8 @@ sinatools/arabert/aragpt2/grover/train_tpu.py,sha256=qNgLI_j6-KYkTMJfVoFlh4NIKwe
|
|
75
75
|
sinatools/arabert/aragpt2/grover/utils.py,sha256=V5wMUxK03r5g_pb7R3_uGLOPqQJfbIB0VaJ8ZDM4XAo,8473
|
76
76
|
sinatools/morphology/ALMA_multi_word.py,sha256=hj_-8ojrYYHnfCGk8WKtJdUR8mauzQdma4WUm-okDps,1346
|
77
77
|
sinatools/morphology/__init__.py,sha256=I4wVBh8BhyNl-CySVdiI_nUSn6gj1j-gmLKP300RpE0,1216
|
78
|
-
sinatools/morphology/morph_analyzer.py,sha256=
|
79
|
-
sinatools/ner/__init__.py,sha256=
|
78
|
+
sinatools/morphology/morph_analyzer.py,sha256=XrLkFqI89GmQuRyZB5X7GNIpfedfGNnQwHzrz5bDu5A,7190
|
79
|
+
sinatools/ner/__init__.py,sha256=isVSWoFZNiWpDCiT4hNKY5C2eVupN2SvCqYbie8oN2k,1289
|
80
80
|
sinatools/ner/data.py,sha256=lvOW86dXse8SC75Q0supQaE0rrRffoxNjIA0Qbv5WZY,4354
|
81
81
|
sinatools/ner/data_format.py,sha256=7Yt0aOicOn9_YuuyCkM_IYi_rgjGYxR9bCuUaNGM73o,4341
|
82
82
|
sinatools/ner/datasets.py,sha256=mG1iwqSm3lXCFHLqE-b4wNi176cpuzNBz8tKaBU6z6M,5059
|
@@ -116,10 +116,10 @@ sinatools/wsd/__init__.py,sha256=mwmCUurOV42rsNRpIUP3luG0oEzeTfEx3oeDl93Oif8,306
|
|
116
116
|
sinatools/wsd/disambiguator.py,sha256=h-3idc5rPPbMDSE_QVJAsEVkDHwzYY3L2SEPNXIdOcc,20104
|
117
117
|
sinatools/wsd/settings.py,sha256=6XflVTFKD8SVySX9Wj7zYQtV26WDTcQ2-uW8-gDNHKE,747
|
118
118
|
sinatools/wsd/wsd.py,sha256=gHIBUFXegoY1z3rRnIlK6TduhYq2BTa_dHakOjOlT4k,4434
|
119
|
-
SinaTools-0.1.
|
120
|
-
SinaTools-0.1.
|
121
|
-
SinaTools-0.1.
|
122
|
-
SinaTools-0.1.
|
123
|
-
SinaTools-0.1.
|
124
|
-
SinaTools-0.1.
|
125
|
-
SinaTools-0.1.
|
119
|
+
SinaTools-0.1.30.dist-info/AUTHORS.rst,sha256=aTWeWlIdfLi56iLJfIUAwIrmqDcgxXKLji75_Fjzjyg,174
|
120
|
+
SinaTools-0.1.30.dist-info/LICENSE,sha256=uwsKYG4TayHXNANWdpfMN2lVW4dimxQjA_7vuCVhD70,1088
|
121
|
+
SinaTools-0.1.30.dist-info/METADATA,sha256=Pl7OjoUAbjqPtzJuGqvXeb7GVCx2t-7cxW4-APUKLIk,3267
|
122
|
+
SinaTools-0.1.30.dist-info/WHEEL,sha256=6T3TYZE4YFi2HTS1BeZHNXAi8N52OZT4O-dJ6-ome_4,116
|
123
|
+
SinaTools-0.1.30.dist-info/entry_points.txt,sha256=ZwZLolnWog2fjdDrfaHNHob8SE_YtMbD6ayzsOzItxs,1234
|
124
|
+
SinaTools-0.1.30.dist-info/top_level.txt,sha256=8tNdPTeJKw3TQCaua8IJIx6N6WpgZZmVekf1OdBNJpE,10
|
125
|
+
SinaTools-0.1.30.dist-info/RECORD,,
|
@@ -10,13 +10,14 @@ urls = {
|
|
10
10
|
'ner': 'https://sina.birzeit.edu/Wj27012000.tar.gz',
|
11
11
|
'wsd_model': 'https://sina.birzeit.edu/bert-base-arabertv02_22_May_2021_00h_allglosses_unused01.zip',
|
12
12
|
'wsd_tokenizer': 'https://sina.birzeit.edu/bert-base-arabertv02.zip',
|
13
|
-
'
|
13
|
+
'one_gram': 'https://sina.birzeit.edu/one_gram.pickle',
|
14
14
|
'five_grams': 'https://sina.birzeit.edu/five_grams.pickle',
|
15
15
|
'four_grams':'https://sina.birzeit.edu/four_grams.pickle',
|
16
16
|
'three_grams':'https://sina.birzeit.edu/three_grams.pickle',
|
17
17
|
'two_grams':'https://sina.birzeit.edu/two_grams.pickle',
|
18
|
-
'
|
19
|
-
'
|
18
|
+
'graph_l2':'https://sina.birzeit.edu/graph_l2.pkl',
|
19
|
+
'graph_l3':'https://sina.birzeit.edu/graph_l3.pkl',
|
20
|
+
'relation':'https://sina.birzeit.edu/relation_model.zip'
|
20
21
|
}
|
21
22
|
|
22
23
|
def get_appdatadir():
|
sinatools/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.1.
|
1
|
+
0.1.30
|
@@ -3,6 +3,7 @@ from sinatools.utils.tokenizers_words import simple_word_tokenize
|
|
3
3
|
from sinatools.utils.parser import arStrip
|
4
4
|
from sinatools.utils.charsets import AR_CHARSET, AR_DIAC_CHARSET
|
5
5
|
from sinatools.DataDownload.downloader import get_appdatadir
|
6
|
+
from sinatools.morphology.morph_analyzer import remove_punctuation
|
6
7
|
from . import dictionary
|
7
8
|
|
8
9
|
_IS_AR_RE = re.compile(u'^[' + re.escape(u''.join(AR_CHARSET)) + u']+$')
|
@@ -98,13 +99,16 @@ def analyze(text, language ='MSA', task ='full', flag="1"):
|
|
98
99
|
token = arStrip(token , False , True , False , False , False , False)
|
99
100
|
token = re.sub('[ٱ]','ﺍ',token)
|
100
101
|
# token, freq, lemma, lemma_id, root, pos
|
101
|
-
solution = [token, 0, token
|
102
|
+
solution = [token, 0, token, 0, token, ""]
|
102
103
|
|
103
104
|
if token.isdigit():
|
104
|
-
solution[5] = "
|
105
|
+
solution[5] = "رقم" #pos
|
106
|
+
|
107
|
+
elif remove_punctuation(token).strip() == "":
|
108
|
+
solution[5] = "علامة ترقيم" #pos
|
105
109
|
|
106
110
|
elif not _is_ar(token):
|
107
|
-
solution[5] = "
|
111
|
+
solution[5] = "أجنبي" #pos
|
108
112
|
|
109
113
|
else:
|
110
114
|
result_token = find_solution(token,language,flag)
|
sinatools/ner/__init__.py
CHANGED
@@ -39,5 +39,4 @@ train_config.trainer_config["kwargs"]["model"] = model
|
|
39
39
|
tagger = load_object(train_config.trainer_config["fn"], train_config.trainer_config["kwargs"])
|
40
40
|
tagger.load(os.path.join(model_path,"checkpoints"))
|
41
41
|
|
42
|
-
pipe = pipeline("sentiment-analysis", model= os.path.join(path, "
|
43
|
-
#pipe = AutoModelForSequenceClassification.from_pretrained(os.path.join(path, "best_model"))
|
42
|
+
pipe = pipeline("sentiment-analysis", model= os.path.join(path, "relation_model"), return_all_scores =True, max_length=128, truncation=True)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|