SinaTools 0.1.29__py2.py3-none-any.whl → 0.1.30__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: SinaTools
3
- Version: 0.1.29
3
+ Version: 0.1.30
4
4
  Summary: Open-source Python toolkit for Arabic Natural Understanding, allowing people to integrate it in their system workflow.
5
5
  Home-page: https://github.com/SinaLab/sinatools
6
6
  License: MIT license
@@ -1,5 +1,5 @@
1
- SinaTools-0.1.29.data/data/sinatools/environment.yml,sha256=OzilhLjZbo_3nU93EQNUFX-6G5O3newiSWrwxvMH2Os,7231
2
- sinatools/VERSION,sha256=3gAvtibHsL3Zih60tzJshU5QcbL40f0qBmAAXPGrB-Q,6
1
+ SinaTools-0.1.30.data/data/sinatools/environment.yml,sha256=OzilhLjZbo_3nU93EQNUFX-6G5O3newiSWrwxvMH2Os,7231
2
+ sinatools/VERSION,sha256=4IPaHhHWuxBZjZ0tYxwy4rdWjvuIZronPKqN26wZ7eE,6
3
3
  sinatools/__init__.py,sha256=bEosTU1o-FSpyytS6iVP_82BXHF2yHnzpJxPLYRbeII,135
4
4
  sinatools/environment.yml,sha256=OzilhLjZbo_3nU93EQNUFX-6G5O3newiSWrwxvMH2Os,7231
5
5
  sinatools/install_env.py,sha256=EODeeE0ZzfM_rz33_JSIruX03Nc4ghyVOM5BHVhsZaQ,404
@@ -20,7 +20,7 @@ sinatools/CLI/utils/sentence_tokenizer.py,sha256=Wli8eiDbWSd_Z8UKpu_JkaS8jImowa1
20
20
  sinatools/CLI/utils/text_dublication_detector.py,sha256=dW70O5O20GxeUDDF6zVYn52wWLmJF-HBZgvqIeVL2rQ,1661
21
21
  sinatools/CLI/utils/text_transliteration.py,sha256=vz-3kxWf8pNYVCqNAtBAiA6u_efrS5NtWT-ofN1NX6I,2014
22
22
  sinatools/DataDownload/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
- sinatools/DataDownload/downloader.py,sha256=6xH55WlDhgtImPRFQ0AaeDFJjL8OMNU29x61PL8mZ2w,6468
23
+ sinatools/DataDownload/downloader.py,sha256=3UkRRH4TLbut10V1BgWO3EqJQaHVBqr6pAj7Fn4AQZ8,6511
24
24
  sinatools/arabert/__init__.py,sha256=ely2PttjgSv7vKdzskuD1rtK_l_UOpmxJSz8isrveD0,16
25
25
  sinatools/arabert/preprocess.py,sha256=qI0FsuMTOzdRlYGCtLrjpXgikNElUZPv9bnjaKDZKJ4,33024
26
26
  sinatools/arabert/arabert/__init__.py,sha256=KbSAH-XqbRygn0y59m5-ZYOLXgpT1gSgE3F-qd4rKEc,627
@@ -75,8 +75,8 @@ sinatools/arabert/aragpt2/grover/train_tpu.py,sha256=qNgLI_j6-KYkTMJfVoFlh4NIKwe
75
75
  sinatools/arabert/aragpt2/grover/utils.py,sha256=V5wMUxK03r5g_pb7R3_uGLOPqQJfbIB0VaJ8ZDM4XAo,8473
76
76
  sinatools/morphology/ALMA_multi_word.py,sha256=hj_-8ojrYYHnfCGk8WKtJdUR8mauzQdma4WUm-okDps,1346
77
77
  sinatools/morphology/__init__.py,sha256=I4wVBh8BhyNl-CySVdiI_nUSn6gj1j-gmLKP300RpE0,1216
78
- sinatools/morphology/morph_analyzer.py,sha256=3B-ewxFg_If83oYlk1bDdVS1clb-mgyAF4WgAMqcAVI,7009
79
- sinatools/ner/__init__.py,sha256=CLPaqUcvPGAA4lU-6hjAqjNfKJ5WtwRfsma6QkYZHEk,1379
78
+ sinatools/morphology/morph_analyzer.py,sha256=XrLkFqI89GmQuRyZB5X7GNIpfedfGNnQwHzrz5bDu5A,7190
79
+ sinatools/ner/__init__.py,sha256=isVSWoFZNiWpDCiT4hNKY5C2eVupN2SvCqYbie8oN2k,1289
80
80
  sinatools/ner/data.py,sha256=lvOW86dXse8SC75Q0supQaE0rrRffoxNjIA0Qbv5WZY,4354
81
81
  sinatools/ner/data_format.py,sha256=7Yt0aOicOn9_YuuyCkM_IYi_rgjGYxR9bCuUaNGM73o,4341
82
82
  sinatools/ner/datasets.py,sha256=mG1iwqSm3lXCFHLqE-b4wNi176cpuzNBz8tKaBU6z6M,5059
@@ -116,10 +116,10 @@ sinatools/wsd/__init__.py,sha256=mwmCUurOV42rsNRpIUP3luG0oEzeTfEx3oeDl93Oif8,306
116
116
  sinatools/wsd/disambiguator.py,sha256=h-3idc5rPPbMDSE_QVJAsEVkDHwzYY3L2SEPNXIdOcc,20104
117
117
  sinatools/wsd/settings.py,sha256=6XflVTFKD8SVySX9Wj7zYQtV26WDTcQ2-uW8-gDNHKE,747
118
118
  sinatools/wsd/wsd.py,sha256=gHIBUFXegoY1z3rRnIlK6TduhYq2BTa_dHakOjOlT4k,4434
119
- SinaTools-0.1.29.dist-info/AUTHORS.rst,sha256=aTWeWlIdfLi56iLJfIUAwIrmqDcgxXKLji75_Fjzjyg,174
120
- SinaTools-0.1.29.dist-info/LICENSE,sha256=uwsKYG4TayHXNANWdpfMN2lVW4dimxQjA_7vuCVhD70,1088
121
- SinaTools-0.1.29.dist-info/METADATA,sha256=IorbBd2klVKi0amBxKMKIEgyJHjRxcpJNxAQBgyNn04,3267
122
- SinaTools-0.1.29.dist-info/WHEEL,sha256=6T3TYZE4YFi2HTS1BeZHNXAi8N52OZT4O-dJ6-ome_4,116
123
- SinaTools-0.1.29.dist-info/entry_points.txt,sha256=ZwZLolnWog2fjdDrfaHNHob8SE_YtMbD6ayzsOzItxs,1234
124
- SinaTools-0.1.29.dist-info/top_level.txt,sha256=8tNdPTeJKw3TQCaua8IJIx6N6WpgZZmVekf1OdBNJpE,10
125
- SinaTools-0.1.29.dist-info/RECORD,,
119
+ SinaTools-0.1.30.dist-info/AUTHORS.rst,sha256=aTWeWlIdfLi56iLJfIUAwIrmqDcgxXKLji75_Fjzjyg,174
120
+ SinaTools-0.1.30.dist-info/LICENSE,sha256=uwsKYG4TayHXNANWdpfMN2lVW4dimxQjA_7vuCVhD70,1088
121
+ SinaTools-0.1.30.dist-info/METADATA,sha256=Pl7OjoUAbjqPtzJuGqvXeb7GVCx2t-7cxW4-APUKLIk,3267
122
+ SinaTools-0.1.30.dist-info/WHEEL,sha256=6T3TYZE4YFi2HTS1BeZHNXAi8N52OZT4O-dJ6-ome_4,116
123
+ SinaTools-0.1.30.dist-info/entry_points.txt,sha256=ZwZLolnWog2fjdDrfaHNHob8SE_YtMbD6ayzsOzItxs,1234
124
+ SinaTools-0.1.30.dist-info/top_level.txt,sha256=8tNdPTeJKw3TQCaua8IJIx6N6WpgZZmVekf1OdBNJpE,10
125
+ SinaTools-0.1.30.dist-info/RECORD,,
@@ -10,13 +10,14 @@ urls = {
10
10
  'ner': 'https://sina.birzeit.edu/Wj27012000.tar.gz',
11
11
  'wsd_model': 'https://sina.birzeit.edu/bert-base-arabertv02_22_May_2021_00h_allglosses_unused01.zip',
12
12
  'wsd_tokenizer': 'https://sina.birzeit.edu/bert-base-arabertv02.zip',
13
- 'glosses_dic': 'https://sina.birzeit.edu/glosses_dic.pickle',
13
+ 'one_gram': 'https://sina.birzeit.edu/one_gram.pickle',
14
14
  'five_grams': 'https://sina.birzeit.edu/five_grams.pickle',
15
15
  'four_grams':'https://sina.birzeit.edu/four_grams.pickle',
16
16
  'three_grams':'https://sina.birzeit.edu/three_grams.pickle',
17
17
  'two_grams':'https://sina.birzeit.edu/two_grams.pickle',
18
- 'synonyms_level2':'https://sina.birzeit.edu/graph_l2.pkl',
19
- 'synonyms_level3':'https://sina.birzeit.edu/graph_l3.pkl'
18
+ 'graph_l2':'https://sina.birzeit.edu/graph_l2.pkl',
19
+ 'graph_l3':'https://sina.birzeit.edu/graph_l3.pkl',
20
+ 'relation':'https://sina.birzeit.edu/relation_model.zip'
20
21
  }
21
22
 
22
23
  def get_appdatadir():
sinatools/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.29
1
+ 0.1.30
@@ -3,6 +3,7 @@ from sinatools.utils.tokenizers_words import simple_word_tokenize
3
3
  from sinatools.utils.parser import arStrip
4
4
  from sinatools.utils.charsets import AR_CHARSET, AR_DIAC_CHARSET
5
5
  from sinatools.DataDownload.downloader import get_appdatadir
6
+ from sinatools.morphology.morph_analyzer import remove_punctuation
6
7
  from . import dictionary
7
8
 
8
9
  _IS_AR_RE = re.compile(u'^[' + re.escape(u''.join(AR_CHARSET)) + u']+$')
@@ -98,13 +99,16 @@ def analyze(text, language ='MSA', task ='full', flag="1"):
98
99
  token = arStrip(token , False , True , False , False , False , False)
99
100
  token = re.sub('[ٱ]','ﺍ',token)
100
101
  # token, freq, lemma, lemma_id, root, pos
101
- solution = [token, 0, token+"_0", 0, token, ""]
102
+ solution = [token, 0, token, 0, token, ""]
102
103
 
103
104
  if token.isdigit():
104
- solution[5] = "digit" #pos
105
+ solution[5] = "رقم" #pos
106
+
107
+ elif remove_punctuation(token).strip() == "":
108
+ solution[5] = "علامة ترقيم" #pos
105
109
 
106
110
  elif not _is_ar(token):
107
- solution[5] = "Foreign" #pos
111
+ solution[5] = "أجنبي" #pos
108
112
 
109
113
  else:
110
114
  result_token = find_solution(token,language,flag)
sinatools/ner/__init__.py CHANGED
@@ -39,5 +39,4 @@ train_config.trainer_config["kwargs"]["model"] = model
39
39
  tagger = load_object(train_config.trainer_config["fn"], train_config.trainer_config["kwargs"])
40
40
  tagger.load(os.path.join(model_path,"checkpoints"))
41
41
 
42
- pipe = pipeline("sentiment-analysis", model= os.path.join(path, "best_model"), return_all_scores =True, max_length=128, truncation=True)
43
- #pipe = AutoModelForSequenceClassification.from_pretrained(os.path.join(path, "best_model"))
42
+ pipe = pipeline("sentiment-analysis", model= os.path.join(path, "relation_model"), return_all_scores =True, max_length=128, truncation=True)