SinaTools 0.1.41__py2.py3-none-any.whl → 1.0.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. {SinaTools-0.1.41.dist-info → SinaTools-1.0.1.dist-info}/METADATA +1 -1
  2. SinaTools-1.0.1.dist-info/RECORD +73 -0
  3. sinatools/VERSION +1 -1
  4. sinatools/ner/trainers/BertNestedTrainer.py +203 -203
  5. sinatools/ner/trainers/BertTrainer.py +163 -163
  6. sinatools/ner/trainers/__init__.py +2 -2
  7. SinaTools-0.1.41.dist-info/RECORD +0 -123
  8. sinatools/arabert/arabert/__init__.py +0 -14
  9. sinatools/arabert/arabert/create_classification_data.py +0 -260
  10. sinatools/arabert/arabert/create_pretraining_data.py +0 -534
  11. sinatools/arabert/arabert/extract_features.py +0 -444
  12. sinatools/arabert/arabert/lamb_optimizer.py +0 -158
  13. sinatools/arabert/arabert/modeling.py +0 -1027
  14. sinatools/arabert/arabert/optimization.py +0 -202
  15. sinatools/arabert/arabert/run_classifier.py +0 -1078
  16. sinatools/arabert/arabert/run_pretraining.py +0 -593
  17. sinatools/arabert/arabert/run_squad.py +0 -1440
  18. sinatools/arabert/arabert/tokenization.py +0 -414
  19. sinatools/arabert/araelectra/__init__.py +0 -1
  20. sinatools/arabert/araelectra/build_openwebtext_pretraining_dataset.py +0 -103
  21. sinatools/arabert/araelectra/build_pretraining_dataset.py +0 -230
  22. sinatools/arabert/araelectra/build_pretraining_dataset_single_file.py +0 -90
  23. sinatools/arabert/araelectra/configure_finetuning.py +0 -172
  24. sinatools/arabert/araelectra/configure_pretraining.py +0 -143
  25. sinatools/arabert/araelectra/finetune/__init__.py +0 -14
  26. sinatools/arabert/araelectra/finetune/feature_spec.py +0 -56
  27. sinatools/arabert/araelectra/finetune/preprocessing.py +0 -173
  28. sinatools/arabert/araelectra/finetune/scorer.py +0 -54
  29. sinatools/arabert/araelectra/finetune/task.py +0 -74
  30. sinatools/arabert/araelectra/finetune/task_builder.py +0 -70
  31. sinatools/arabert/araelectra/flops_computation.py +0 -215
  32. sinatools/arabert/araelectra/model/__init__.py +0 -14
  33. sinatools/arabert/araelectra/model/modeling.py +0 -1029
  34. sinatools/arabert/araelectra/model/optimization.py +0 -193
  35. sinatools/arabert/araelectra/model/tokenization.py +0 -355
  36. sinatools/arabert/araelectra/pretrain/__init__.py +0 -14
  37. sinatools/arabert/araelectra/pretrain/pretrain_data.py +0 -160
  38. sinatools/arabert/araelectra/pretrain/pretrain_helpers.py +0 -229
  39. sinatools/arabert/araelectra/run_finetuning.py +0 -323
  40. sinatools/arabert/araelectra/run_pretraining.py +0 -469
  41. sinatools/arabert/araelectra/util/__init__.py +0 -14
  42. sinatools/arabert/araelectra/util/training_utils.py +0 -112
  43. sinatools/arabert/araelectra/util/utils.py +0 -109
  44. sinatools/arabert/aragpt2/__init__.py +0 -2
  45. sinatools/arabert/aragpt2/create_pretraining_data.py +0 -95
  46. sinatools/arabert/aragpt2/gpt2/__init__.py +0 -2
  47. sinatools/arabert/aragpt2/gpt2/lamb_optimizer.py +0 -158
  48. sinatools/arabert/aragpt2/gpt2/optimization.py +0 -225
  49. sinatools/arabert/aragpt2/gpt2/run_pretraining.py +0 -397
  50. sinatools/arabert/aragpt2/grover/__init__.py +0 -0
  51. sinatools/arabert/aragpt2/grover/dataloader.py +0 -161
  52. sinatools/arabert/aragpt2/grover/modeling.py +0 -803
  53. sinatools/arabert/aragpt2/grover/modeling_gpt2.py +0 -1196
  54. sinatools/arabert/aragpt2/grover/optimization_adafactor.py +0 -234
  55. sinatools/arabert/aragpt2/grover/train_tpu.py +0 -187
  56. sinatools/arabert/aragpt2/grover/utils.py +0 -234
  57. sinatools/arabert/aragpt2/train_bpe_tokenizer.py +0 -59
  58. {SinaTools-0.1.41.data → SinaTools-1.0.1.data}/data/sinatools/environment.yml +0 -0
  59. {SinaTools-0.1.41.dist-info → SinaTools-1.0.1.dist-info}/AUTHORS.rst +0 -0
  60. {SinaTools-0.1.41.dist-info → SinaTools-1.0.1.dist-info}/LICENSE +0 -0
  61. {SinaTools-0.1.41.dist-info → SinaTools-1.0.1.dist-info}/WHEEL +0 -0
  62. {SinaTools-0.1.41.dist-info → SinaTools-1.0.1.dist-info}/entry_points.txt +0 -0
  63. {SinaTools-0.1.41.dist-info → SinaTools-1.0.1.dist-info}/top_level.txt +0 -0
@@ -1,163 +1,163 @@
1
- import os
2
- import logging
3
- import torch
4
- import numpy as np
5
- from sinatools.ner.trainers import BaseTrainer
6
- from sinatools.ner.metrics import compute_single_label_metrics
7
-
8
- logger = logging.getLogger(__name__)
9
-
10
-
11
- class BertTrainer(BaseTrainer):
12
- def __init__(self, **kwargs):
13
- super().__init__(**kwargs)
14
-
15
- def train(self):
16
- best_val_loss, test_loss = np.inf, np.inf
17
- num_train_batch = len(self.train_dataloader)
18
- patience = self.patience
19
-
20
- for epoch_index in range(self.max_epochs):
21
- self.current_epoch = epoch_index
22
- train_loss = 0
23
-
24
- for batch_index, (_, gold_tags, _, _, logits) in enumerate(self.tag(
25
- self.train_dataloader, is_train=True
26
- ), 1):
27
- self.current_timestep += 1
28
- batch_loss = self.loss(logits.view(-1, logits.shape[-1]), gold_tags.view(-1))
29
- batch_loss.backward()
30
-
31
- # Avoid exploding gradient by doing gradient clipping
32
- torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip)
33
-
34
- self.optimizer.step()
35
- self.scheduler.step()
36
- train_loss += batch_loss.item()
37
-
38
- if self.current_timestep % self.log_interval == 0:
39
- logger.info(
40
- "Epoch %d | Batch %d/%d | Timestep %d | LR %.10f | Loss %f",
41
- epoch_index,
42
- batch_index,
43
- num_train_batch,
44
- self.current_timestep,
45
- self.optimizer.param_groups[0]['lr'],
46
- batch_loss.item()
47
- )
48
-
49
- train_loss /= num_train_batch
50
-
51
- logger.info("** Evaluating on validation dataset **")
52
- val_preds, segments, valid_len, val_loss = self.eval(self.val_dataloader)
53
- val_metrics = compute_single_label_metrics(segments)
54
-
55
- epoch_summary_loss = {
56
- "train_loss": train_loss,
57
- "val_loss": val_loss
58
- }
59
- epoch_summary_metrics = {
60
- "val_micro_f1": val_metrics.micro_f1,
61
- "val_precision": val_metrics.precision,
62
- "val_recall": val_metrics.recall
63
- }
64
-
65
- logger.info(
66
- "Epoch %d | Timestep %d | Train Loss %f | Val Loss %f | F1 %f",
67
- epoch_index,
68
- self.current_timestep,
69
- train_loss,
70
- val_loss,
71
- val_metrics.micro_f1
72
- )
73
-
74
- if val_loss < best_val_loss:
75
- patience = self.patience
76
- best_val_loss = val_loss
77
- logger.info("** Validation improved, evaluating test data **")
78
- test_preds, segments, valid_len, test_loss = self.eval(self.test_dataloader)
79
- self.segments_to_file(segments, os.path.join(self.output_path, "predictions.txt"))
80
- test_metrics = compute_single_label_metrics(segments)
81
-
82
- epoch_summary_loss["test_loss"] = test_loss
83
- epoch_summary_metrics["test_micro_f1"] = test_metrics.micro_f1
84
- epoch_summary_metrics["test_precision"] = test_metrics.precision
85
- epoch_summary_metrics["test_recall"] = test_metrics.recall
86
-
87
- logger.info(
88
- f"Epoch %d | Timestep %d | Test Loss %f | F1 %f",
89
- epoch_index,
90
- self.current_timestep,
91
- test_loss,
92
- test_metrics.micro_f1
93
- )
94
-
95
- self.save()
96
- else:
97
- patience -= 1
98
-
99
- # No improvements, terminating early
100
- if patience == 0:
101
- logger.info("Early termination triggered")
102
- break
103
-
104
- self.summary_writer.add_scalars("Loss", epoch_summary_loss, global_step=self.current_timestep)
105
- self.summary_writer.add_scalars("Metrics", epoch_summary_metrics, global_step=self.current_timestep)
106
-
107
- def eval(self, dataloader):
108
- golds, preds, segments, valid_lens = list(), list(), list(), list()
109
- loss = 0
110
-
111
- for _, gold_tags, tokens, valid_len, logits in self.tag(
112
- dataloader, is_train=False
113
- ):
114
- loss += self.loss(logits.view(-1, logits.shape[-1]), gold_tags.view(-1))
115
- preds += torch.argmax(logits, dim=2).detach().cpu().numpy().tolist()
116
- segments += tokens
117
- valid_lens += list(valid_len)
118
-
119
- loss /= len(dataloader)
120
-
121
- # Update segments, attach predicted tags to each token
122
- segments = self.to_segments(segments, preds, valid_lens, dataloader.dataset.vocab)
123
-
124
- return preds, segments, valid_lens, loss.item()
125
-
126
- def infer(self, dataloader):
127
- golds, preds, segments, valid_lens = list(), list(), list(), list()
128
-
129
- for _, gold_tags, tokens, valid_len, logits in self.tag(
130
- dataloader, is_train=False
131
- ):
132
- preds += torch.argmax(logits, dim=2).detach().cpu().numpy().tolist()
133
- segments += tokens
134
- valid_lens += list(valid_len)
135
-
136
- segments = self.to_segments(segments, preds, valid_lens, dataloader.dataset.vocab)
137
- return segments
138
-
139
- def to_segments(self, segments, preds, valid_lens, vocab):
140
- if vocab is None:
141
- vocab = self.vocab
142
-
143
- tagged_segments = list()
144
- tokens_stoi = vocab.tokens.get_stoi()
145
- tags_itos = vocab.tags[0].get_itos()
146
- unk_id = tokens_stoi["UNK"]
147
-
148
- for segment, pred, valid_len in zip(segments, preds, valid_lens):
149
- # First, the token at 0th index [CLS] and token at nth index [SEP]
150
- # Combine the tokens with their corresponding predictions
151
- segment_pred = zip(segment[1:valid_len-1], pred[1:valid_len-1])
152
-
153
- # Ignore the sub-tokens/subwords, which are identified with text being UNK
154
- segment_pred = list(filter(lambda t: tokens_stoi[t[0].text] != unk_id, segment_pred))
155
-
156
- # Attach the predicted tags to each token
157
- list(map(lambda t: setattr(t[0], 'pred_tag', [{"tag": tags_itos[t[1]]}]), segment_pred))
158
-
159
- # We are only interested in the tagged tokens, we do no longer need raw model predictions
160
- tagged_segment = [t for t, _ in segment_pred]
161
- tagged_segments.append(tagged_segment)
162
-
163
- return tagged_segments
1
+ import os
2
+ import logging
3
+ import torch
4
+ import numpy as np
5
+ from sinatools.ner.trainers import BaseTrainer
6
+ from sinatools.ner.metrics import compute_single_label_metrics
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ class BertTrainer(BaseTrainer):
12
+ def __init__(self, **kwargs):
13
+ super().__init__(**kwargs)
14
+
15
+ def train(self):
16
+ best_val_loss, test_loss = np.inf, np.inf
17
+ num_train_batch = len(self.train_dataloader)
18
+ patience = self.patience
19
+
20
+ for epoch_index in range(self.max_epochs):
21
+ self.current_epoch = epoch_index
22
+ train_loss = 0
23
+
24
+ for batch_index, (_, gold_tags, _, _, logits) in enumerate(self.tag(
25
+ self.train_dataloader, is_train=True
26
+ ), 1):
27
+ self.current_timestep += 1
28
+ batch_loss = self.loss(logits.view(-1, logits.shape[-1]), gold_tags.view(-1))
29
+ batch_loss.backward()
30
+
31
+ # Avoid exploding gradient by doing gradient clipping
32
+ torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.clip)
33
+
34
+ self.optimizer.step()
35
+ self.scheduler.step()
36
+ train_loss += batch_loss.item()
37
+
38
+ if self.current_timestep % self.log_interval == 0:
39
+ logger.info(
40
+ "Epoch %d | Batch %d/%d | Timestep %d | LR %.10f | Loss %f",
41
+ epoch_index,
42
+ batch_index,
43
+ num_train_batch,
44
+ self.current_timestep,
45
+ self.optimizer.param_groups[0]['lr'],
46
+ batch_loss.item()
47
+ )
48
+
49
+ train_loss /= num_train_batch
50
+
51
+ logger.info("** Evaluating on validation dataset **")
52
+ val_preds, segments, valid_len, val_loss = self.eval(self.val_dataloader)
53
+ val_metrics = compute_single_label_metrics(segments)
54
+
55
+ epoch_summary_loss = {
56
+ "train_loss": train_loss,
57
+ "val_loss": val_loss
58
+ }
59
+ epoch_summary_metrics = {
60
+ "val_micro_f1": val_metrics.micro_f1,
61
+ "val_precision": val_metrics.precision,
62
+ "val_recall": val_metrics.recall
63
+ }
64
+
65
+ logger.info(
66
+ "Epoch %d | Timestep %d | Train Loss %f | Val Loss %f | F1 %f",
67
+ epoch_index,
68
+ self.current_timestep,
69
+ train_loss,
70
+ val_loss,
71
+ val_metrics.micro_f1
72
+ )
73
+
74
+ if val_loss < best_val_loss:
75
+ patience = self.patience
76
+ best_val_loss = val_loss
77
+ logger.info("** Validation improved, evaluating test data **")
78
+ test_preds, segments, valid_len, test_loss = self.eval(self.test_dataloader)
79
+ self.segments_to_file(segments, os.path.join(self.output_path, "predictions.txt"))
80
+ test_metrics = compute_single_label_metrics(segments)
81
+
82
+ epoch_summary_loss["test_loss"] = test_loss
83
+ epoch_summary_metrics["test_micro_f1"] = test_metrics.micro_f1
84
+ epoch_summary_metrics["test_precision"] = test_metrics.precision
85
+ epoch_summary_metrics["test_recall"] = test_metrics.recall
86
+
87
+ logger.info(
88
+ f"Epoch %d | Timestep %d | Test Loss %f | F1 %f",
89
+ epoch_index,
90
+ self.current_timestep,
91
+ test_loss,
92
+ test_metrics.micro_f1
93
+ )
94
+
95
+ self.save()
96
+ else:
97
+ patience -= 1
98
+
99
+ # No improvements, terminating early
100
+ if patience == 0:
101
+ logger.info("Early termination triggered")
102
+ break
103
+
104
+ self.summary_writer.add_scalars("Loss", epoch_summary_loss, global_step=self.current_timestep)
105
+ self.summary_writer.add_scalars("Metrics", epoch_summary_metrics, global_step=self.current_timestep)
106
+
107
+ def eval(self, dataloader):
108
+ golds, preds, segments, valid_lens = list(), list(), list(), list()
109
+ loss = 0
110
+
111
+ for _, gold_tags, tokens, valid_len, logits in self.tag(
112
+ dataloader, is_train=False
113
+ ):
114
+ loss += self.loss(logits.view(-1, logits.shape[-1]), gold_tags.view(-1))
115
+ preds += torch.argmax(logits, dim=2).detach().cpu().numpy().tolist()
116
+ segments += tokens
117
+ valid_lens += list(valid_len)
118
+
119
+ loss /= len(dataloader)
120
+
121
+ # Update segments, attach predicted tags to each token
122
+ segments = self.to_segments(segments, preds, valid_lens, dataloader.dataset.vocab)
123
+
124
+ return preds, segments, valid_lens, loss.item()
125
+
126
+ def infer(self, dataloader):
127
+ golds, preds, segments, valid_lens = list(), list(), list(), list()
128
+
129
+ for _, gold_tags, tokens, valid_len, logits in self.tag(
130
+ dataloader, is_train=False
131
+ ):
132
+ preds += torch.argmax(logits, dim=2).detach().cpu().numpy().tolist()
133
+ segments += tokens
134
+ valid_lens += list(valid_len)
135
+
136
+ segments = self.to_segments(segments, preds, valid_lens, dataloader.dataset.vocab)
137
+ return segments
138
+
139
+ def to_segments(self, segments, preds, valid_lens, vocab):
140
+ if vocab is None:
141
+ vocab = self.vocab
142
+
143
+ tagged_segments = list()
144
+ tokens_stoi = vocab.tokens.get_stoi()
145
+ tags_itos = vocab.tags[0].get_itos()
146
+ unk_id = tokens_stoi["UNK"]
147
+
148
+ for segment, pred, valid_len in zip(segments, preds, valid_lens):
149
+ # First, the token at 0th index [CLS] and token at nth index [SEP]
150
+ # Combine the tokens with their corresponding predictions
151
+ segment_pred = zip(segment[1:valid_len-1], pred[1:valid_len-1])
152
+
153
+ # Ignore the sub-tokens/subwords, which are identified with text being UNK
154
+ segment_pred = list(filter(lambda t: tokens_stoi[t[0].text] != unk_id, segment_pred))
155
+
156
+ # Attach the predicted tags to each token
157
+ list(map(lambda t: setattr(t[0], 'pred_tag', [{"tag": tags_itos[t[1]]}]), segment_pred))
158
+
159
+ # We are only interested in the tagged tokens, we do no longer need raw model predictions
160
+ tagged_segment = [t for t, _ in segment_pred]
161
+ tagged_segments.append(tagged_segment)
162
+
163
+ return tagged_segments
@@ -1,3 +1,3 @@
1
- from sinatools.ner.trainers.BaseTrainer import BaseTrainer
2
- from sinatools.ner.trainers.BertTrainer import BertTrainer
1
+ from sinatools.ner.trainers.BaseTrainer import BaseTrainer
2
+ from sinatools.ner.trainers.BertTrainer import BertTrainer
3
3
  from sinatools.ner.trainers.BertNestedTrainer import BertNestedTrainer
@@ -1,123 +0,0 @@
1
- SinaTools-0.1.41.data/data/sinatools/environment.yml,sha256=i0UFZc-vwU9ZwnI8hBdz7vi-x22vG-HR8ojWBUAOkno,5422
2
- sinatools/VERSION,sha256=MoDOfoT2nFdWJzX_rU2mpf_gRhWTZlYybGVGv8AKRLk,6
3
- sinatools/__init__.py,sha256=bEosTU1o-FSpyytS6iVP_82BXHF2yHnzpJxPLYRbeII,135
4
- sinatools/environment.yml,sha256=i0UFZc-vwU9ZwnI8hBdz7vi-x22vG-HR8ojWBUAOkno,5422
5
- sinatools/install_env.py,sha256=EODeeE0ZzfM_rz33_JSIruX03Nc4ghyVOM5BHVhsZaQ,404
6
- sinatools/sinatools.py,sha256=vR5AaF0iel21LvsdcqwheoBz0SIj9K9I_Ub8M8oA98Y,20
7
- sinatools/CLI/DataDownload/download_files.py,sha256=EezvbukR3pZ8s6mGZnzTcjsbo3CBDlC0g6KhJWlYp1w,2686
8
- sinatools/CLI/morphology/ALMA_multi_word.py,sha256=rmpa72twwIJHme_kpQ1lu3_7y_Jorj70QTvOnQMJRuI,1274
9
- sinatools/CLI/morphology/morph_analyzer.py,sha256=HPamEKos_JRYCJv_2q6c12N--da58_JXTno9haww5Ao,3497
10
- sinatools/CLI/ner/corpus_entity_extractor.py,sha256=DdvigsDQzko5nJBjzUXlIDqoBMBTVzktjSo7JfEXTIA,4778
11
- sinatools/CLI/ner/entity_extractor.py,sha256=G9j-t0WKm2CRORhqARJM-pI-KArQ2IXIvnBK_NHxlHs,2885
12
- sinatools/CLI/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
- sinatools/CLI/utils/arStrip.py,sha256=NLyp8vOu2xv80tL9jiKRvyptmbkRZVg-wcAr-9YyvNY,3264
14
- sinatools/CLI/utils/corpus_tokenizer.py,sha256=nH0T4h6urr_0Qy6-wN3PquOtnwybj0REde5Ts_OE4U8,1650
15
- sinatools/CLI/utils/implication.py,sha256=AojpkCwUQJiQjxhyEUWKRHmBnIt1tVqr485cAF7Thq0,2857
16
- sinatools/CLI/utils/jaccard.py,sha256=w56N_cNEFJ0A7WtunmY_xtms4srFagKBzrW_0YhH2DE,4216
17
- sinatools/CLI/utils/remove_latin.py,sha256=NOaTm2RHxt5IQrV98ySTmD8rTXTmcqSmfbPAwTyaXqU,848
18
- sinatools/CLI/utils/remove_punctuation.py,sha256=vJAZlEn7WGftZAFVFYnddkRrxdJ_rMmKB9vFZkY-jN4,1097
19
- sinatools/CLI/utils/sentence_tokenizer.py,sha256=Wli8eiDbWSd_Z8UKpu_JkaS8jImowa1vnRL0oYCSfqw,2823
20
- sinatools/CLI/utils/text_dublication_detector.py,sha256=dW70O5O20GxeUDDF6zVYn52wWLmJF-HBZgvqIeVL2rQ,1661
21
- sinatools/CLI/utils/text_transliteration.py,sha256=vz-3kxWf8pNYVCqNAtBAiA6u_efrS5NtWT-ofN1NX6I,2014
22
- sinatools/DataDownload/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
- sinatools/DataDownload/downloader.py,sha256=VdUNgSqMKz1J-DuQD_eS1U2KWqEpy94WlSJ0pPODLig,7833
24
- sinatools/arabert/__init__.py,sha256=ely2PttjgSv7vKdzskuD1rtK_l_UOpmxJSz8isrveD0,16
25
- sinatools/arabert/preprocess.py,sha256=qI0FsuMTOzdRlYGCtLrjpXgikNElUZPv9bnjaKDZKJ4,33024
26
- sinatools/arabert/arabert/__init__.py,sha256=KbSAH-XqbRygn0y59m5-ZYOLXgpT1gSgE3F-qd4rKEc,627
27
- sinatools/arabert/arabert/create_classification_data.py,sha256=BhemGNRbYz_Pun0Q5WerN2-9n-ILmU3tm4J-OlHw5-A,7678
28
- sinatools/arabert/arabert/create_pretraining_data.py,sha256=2M-cF3CLHbQ0cdWrzFT6Frg1vVP4Y-CFoq8iEPyxgsE,18924
29
- sinatools/arabert/arabert/extract_features.py,sha256=C1IzASrlX7u4_M2xdr_PjzWfTRZgklhUXA2WHKgQt-I,15585
30
- sinatools/arabert/arabert/lamb_optimizer.py,sha256=uN3Dcx-6n2_OwepyymRrGrB4EcSkR8b2ZczZrOr7bpY,6263
31
- sinatools/arabert/arabert/modeling.py,sha256=KliecCmA1pP3owg0mYge6On3IRHunMF5kMLuEwc0VLw,40896
32
- sinatools/arabert/arabert/optimization.py,sha256=Wx0Js6Zsfc3iVw-_7Q1SCnxfP_qqbdTAyFD-vZSpOyk,8153
33
- sinatools/arabert/arabert/run_classifier.py,sha256=AdVGyvidlmbEp12b-PauiBo6EmFLEO7tqeJKuLhK2DA,38777
34
- sinatools/arabert/arabert/run_pretraining.py,sha256=yO16nKkHDfcYA2Zx7vv8KN4te6_1qFOzyVeDzFT-DQw,21894
35
- sinatools/arabert/arabert/run_squad.py,sha256=PORxgiByP8L6vZqAFkqgHPJ_ZjAlqlg64gtkdLmDNns,53456
36
- sinatools/arabert/arabert/tokenization.py,sha256=R6xkyCb8_vgeksXiLeqDvV5vOnLb1cPNsvfDij6YVFk,14132
37
- sinatools/arabert/araelectra/__init__.py,sha256=ely2PttjgSv7vKdzskuD1rtK_l_UOpmxJSz8isrveD0,16
38
- sinatools/arabert/araelectra/build_openwebtext_pretraining_dataset.py,sha256=pIo6VFT3XXOYroZaab3msZAP6XjCKu0KcrIZQA0Pj8U,3881
39
- sinatools/arabert/araelectra/build_pretraining_dataset.py,sha256=Z8ZmKznaE_2SPDRoPYR1SDhjTN_NTpNCFFuhUkykwl8,9041
40
- sinatools/arabert/araelectra/build_pretraining_dataset_single_file.py,sha256=W7HFr1XoO6bCDR7X7w-bOuwULFtTSjeKbJ2LHzzHf9k,3224
41
- sinatools/arabert/araelectra/configure_finetuning.py,sha256=YfGLMdgN6Qqm357Mzy5UMjkuLPPWtBs7f4dA-DKE6JM,7768
42
- sinatools/arabert/araelectra/configure_pretraining.py,sha256=oafQgu4WmVdxBcU5mSfXhPlvCk43CJwAWXC10Q58BlI,5801
43
- sinatools/arabert/araelectra/flops_computation.py,sha256=krHTeuPH9xQu5ldprBOPJNlJRvC7fmmvXXqUjfWrzPE,9499
44
- sinatools/arabert/araelectra/run_finetuning.py,sha256=JecbrSmGikBNyid4JKRZ49Rm5xFpt02WfgIIcs3TpcU,12976
45
- sinatools/arabert/araelectra/run_pretraining.py,sha256=1K2aAFTY0p3iaLY0xkhTlm6v0B-Zun8SwEzz-K6RXM4,20665
46
- sinatools/arabert/araelectra/finetune/__init__.py,sha256=d55FZ9ZE-_t_WWMnIiRGozkTw50vBZ-s9BMy7l_I-ao,619
47
- sinatools/arabert/araelectra/finetune/feature_spec.py,sha256=cqNlBa2KK_G1-vkKm1EJUv6BoS3gesCUAHwVagZB6wM,1888
48
- sinatools/arabert/araelectra/finetune/preprocessing.py,sha256=1mf7-IxknCRsobQZ-VV1zs4Cwt-mfOtoVxysDJa9LZ0,6657
49
- sinatools/arabert/araelectra/finetune/scorer.py,sha256=PjRg0P5ANCtul2ute7ccq3mRCCoIAoCb-lVLlwd4rVY,1571
50
- sinatools/arabert/araelectra/finetune/task.py,sha256=zM8M4PGSIrY2u6ytpmkQEXxG-jjoeN9wouEyVR23qeQ,1991
51
- sinatools/arabert/araelectra/finetune/task_builder.py,sha256=Zsoiuw5M3Ca8QhaZVLVLZyWw09K5R75UeMuPmazMlHI,2768
52
- sinatools/arabert/araelectra/model/__init__.py,sha256=d55FZ9ZE-_t_WWMnIiRGozkTw50vBZ-s9BMy7l_I-ao,619
53
- sinatools/arabert/araelectra/model/modeling.py,sha256=5XLIutnmr-SFQOV_XntJ-U5evSCY-J2e9NjvlwVXKkk,40877
54
- sinatools/arabert/araelectra/model/optimization.py,sha256=BCMb_C5hgBw7wC9ZR8AQ4lwoPopqLIcSiqcCrIjx9XU,7254
55
- sinatools/arabert/araelectra/model/tokenization.py,sha256=9CkyPzs3L6OEPzN-7EWQDNQmW2mIJoZD4o1rn6xLdL4,11082
56
- sinatools/arabert/araelectra/pretrain/__init__.py,sha256=d55FZ9ZE-_t_WWMnIiRGozkTw50vBZ-s9BMy7l_I-ao,619
57
- sinatools/arabert/araelectra/pretrain/pretrain_data.py,sha256=NLgIcLAq1-MgtBNXYu_isDxnOY5k67SyADYy-8nzBok,5413
58
- sinatools/arabert/araelectra/pretrain/pretrain_helpers.py,sha256=nFl7LEdxAU5kKwiodqJHzi-ty9jMFsCCNYOF__A69j8,9255
59
- sinatools/arabert/araelectra/util/__init__.py,sha256=d55FZ9ZE-_t_WWMnIiRGozkTw50vBZ-s9BMy7l_I-ao,619
60
- sinatools/arabert/araelectra/util/training_utils.py,sha256=7h_J1ljUWM0ynBcofEtjZWL_oAfZtTxEemQLkixgn-0,4142
61
- sinatools/arabert/araelectra/util/utils.py,sha256=G0UAETUCZMlU9R9ASD9AXrWZeodWI1aZJEE9F-goaH4,2591
62
- sinatools/arabert/aragpt2/__init__.py,sha256=aQkKhQwWaS61wYEeOdx682upeMWFPUjLxXSs7JM1sOA,18
63
- sinatools/arabert/aragpt2/create_pretraining_data.py,sha256=fFa2_DAyTwc8L2IqQbshsh_Ia26nj1qtVLzC6DooSac,3105
64
- sinatools/arabert/aragpt2/train_bpe_tokenizer.py,sha256=b-8zHQ02fLmZV4GfjnrPptwjpX259F41SlnWzBrflMA,1888
65
- sinatools/arabert/aragpt2/gpt2/__init__.py,sha256=aQkKhQwWaS61wYEeOdx682upeMWFPUjLxXSs7JM1sOA,18
66
- sinatools/arabert/aragpt2/gpt2/lamb_optimizer.py,sha256=uN3Dcx-6n2_OwepyymRrGrB4EcSkR8b2ZczZrOr7bpY,6263
67
- sinatools/arabert/aragpt2/gpt2/optimization.py,sha256=iqh23cypRSRUt53wt2G5SbNNpJMwERM7gZAOKVh5l4U,8411
68
- sinatools/arabert/aragpt2/gpt2/run_pretraining.py,sha256=4jjkUbvTO1DHoKJ89yKtlkkofcND_fyAunQ-mlnJhTM,13298
69
- sinatools/arabert/aragpt2/grover/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
70
- sinatools/arabert/aragpt2/grover/dataloader.py,sha256=-FWPTjtsvweEE1WaWRHBXfOSbsGiUmnXT3qK7KJP8cM,6853
71
- sinatools/arabert/aragpt2/grover/modeling.py,sha256=XcUvFwqRaxAwWiJstrH2FPBvDJe03pTWIyipdMfWj9g,38280
72
- sinatools/arabert/aragpt2/grover/modeling_gpt2.py,sha256=WFpCWn1792yATFzt8rZ0rpWvExfbLzV2BqiEs7llFUw,51602
73
- sinatools/arabert/aragpt2/grover/optimization_adafactor.py,sha256=1geOsCWuv5xxtSnKDz9a8aY5SVwZ1MGq-xVQDBg4Gpg,9765
74
- sinatools/arabert/aragpt2/grover/train_tpu.py,sha256=qNgLI_j6-KYkTMJfVoFlh4NIKweY1aPz1XPDw6odld0,7102
75
- sinatools/arabert/aragpt2/grover/utils.py,sha256=V5wMUxK03r5g_pb7R3_uGLOPqQJfbIB0VaJ8ZDM4XAo,8473
76
- sinatools/morphology/ALMA_multi_word.py,sha256=hj_-8ojrYYHnfCGk8WKtJdUR8mauzQdma4WUm-okDps,1346
77
- sinatools/morphology/__init__.py,sha256=I4wVBh8BhyNl-CySVdiI_nUSn6gj1j-gmLKP300RpE0,1216
78
- sinatools/morphology/morph_analyzer.py,sha256=JOH2UWKNQWo5UzpWNzP9R1D3B3qLSogIiMp8n0N_56o,7177
79
- sinatools/ner/__init__.py,sha256=59kLMX6UQhF6JpE10RhaDYC3a2_jiWOIVPuejsoflFE,1050
80
- sinatools/ner/data_format.py,sha256=VmFshZbEPOsWxsb4tgSkwvbM1k7yCce4kmtPkCiWgwM,4513
81
- sinatools/ner/datasets.py,sha256=mG1iwqSm3lXCFHLqE-b4wNi176cpuzNBz8tKaBU6z6M,5059
82
- sinatools/ner/entity_extractor.py,sha256=O2epRwRFUUcQs3SnFIYHVBI4zVhr8hRcj0XJYeby4ts,3588
83
- sinatools/ner/helpers.py,sha256=sX6ezVbuVQxk_xJqZwhUzJVFVuVmFGmei_kd6r3sPHE,3652
84
- sinatools/ner/metrics.py,sha256=Irz6SsIvpOzGIA2lWxrEV86xnTnm0TzKm9SUVT4SXUU,2734
85
- sinatools/ner/transforms.py,sha256=vti3mDdi-IRP8i0aTQ37QqpPlP9hdMmJ6_bAMa0uL-s,4871
86
- sinatools/ner/data/__init__.py,sha256=W0C1ge_XxTfmdEGz0hkclz57aLI5VFS5t6BjByCfkFk,57
87
- sinatools/ner/data/datasets.py,sha256=_uUlvBAhnTtPwKLj0wIbmB04VCBidfwffxKorLGHq_g,5134
88
- sinatools/ner/data/transforms.py,sha256=URMz1dHzkHjgUGAkDOenCWvQThO1ha8XeQVjoLL9RXM,4874
89
- sinatools/ner/nn/BaseModel.py,sha256=3GmujQasTZZunOBuFXpY2p1W8W256iI_Uu4hxhOY2Z0,608
90
- sinatools/ner/nn/BertNestedTagger.py,sha256=_fwAn1kiKmXe6m5y16Ipty3kvXIEFEmiUq74Ad1818U,1219
91
- sinatools/ner/nn/BertSeqTagger.py,sha256=dFcBBiMw2QCWsyy7aQDe_PS3aRuNn4DOxKIHgTblFvc,504
92
- sinatools/ner/nn/__init__.py,sha256=UgQD_XLNzQGBNSYc_Bw1aRJZjq4PJsnMT1iZwnJemqE,170
93
- sinatools/ner/trainers/BaseTrainer.py,sha256=Uar8HxtgBXCVhKa85sEN622d9P7JiFBcWfs46uRG4aA,4068
94
- sinatools/ner/trainers/BertNestedTrainer.py,sha256=iJOah69tXZsAXBimqP0odEsk8SPX4A355riePzW2BFs,8632
95
- sinatools/ner/trainers/BertTrainer.py,sha256=BtttsrHPolmK3eRDqrgVUuv6lVMuImIeskxhi02Q-44,6596
96
- sinatools/ner/trainers/__init__.py,sha256=Xnbi_M4KKJRqV7FJe1vklyT0nEW2Q2obxgcWkbR0ZbA,190
97
- sinatools/relations/__init__.py,sha256=cYjsP2mlTYvAwVIEFtgA6i9gLUSkGVOuDggMs7TvG5k,272
98
- sinatools/relations/relation_extractor.py,sha256=UuDlaaR0ch9BFv4sBF1tr7P-P9xq8oRZF41tAze6_ok,9751
99
- sinatools/semantic_relatedness/__init__.py,sha256=S0xrmqtl72L02N56nbNMudPoebnYQgsaIyyX-587DsU,830
100
- sinatools/semantic_relatedness/compute_relatedness.py,sha256=_9HFPs3nQBLklHFfkc9o3gEjEI6Bd34Ha4E1Kvv1RIg,2256
101
- sinatools/synonyms/__init__.py,sha256=yMuphNZrm5XLOR2T0weOHcUysJm-JKHUmVLoLQO8390,548
102
- sinatools/synonyms/synonyms_generator.py,sha256=jRd0D3_kn-jYBaZzqY-7oOy0SFjSJ-mjM7JhsySzX58,9037
103
- sinatools/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
104
- sinatools/utils/charsets.py,sha256=rs82oZJqRqosZdTKXfFAJfJ5t4PxjMM_oAPsiWSWuwU,2817
105
- sinatools/utils/parser.py,sha256=qvHdln5R5CAv_0UOJWe0mcp8JCsGqgazoeIIkoALH88,6259
106
- sinatools/utils/readfile.py,sha256=xE4LEaCqXJIk9v37QUSSmWb-aY3UnCFUNb7uVdx3cpM,133
107
- sinatools/utils/similarity.py,sha256=HAK6OmyVnfjPm0GWL3z9s4ZoUwpZHVKxt3CeSMfqLIQ,11990
108
- sinatools/utils/text_dublication_detector.py,sha256=FeSkbfWGMQluz23H4CBHXION-walZPgjueX6AL8u_Q0,5660
109
- sinatools/utils/text_transliteration.py,sha256=F3smhr2AEJtySE6wGQsiXXOslTvSDzLivTYu0btgc10,8769
110
- sinatools/utils/tokenizer.py,sha256=nyk6lh5-p38wrU62hvh4wg7ni9ammkdqqIgcjbbBxxo,6965
111
- sinatools/utils/tokenizers_words.py,sha256=efNfOil9qDNVJ9yynk_8sqf65PsL-xtsHG7y2SZCkjQ,656
112
- sinatools/utils/word_compare.py,sha256=rS2Z74sf7R-7MTXyrFj5miRi2TnSG9OdTDp_qQYuo2Y,28200
113
- sinatools/wsd/__init__.py,sha256=mwmCUurOV42rsNRpIUP3luG0oEzeTfEx3oeDl93Oif8,306
114
- sinatools/wsd/disambiguator.py,sha256=h-3idc5rPPbMDSE_QVJAsEVkDHwzYY3L2SEPNXIdOcc,20104
115
- sinatools/wsd/settings.py,sha256=6XflVTFKD8SVySX9Wj7zYQtV26WDTcQ2-uW8-gDNHKE,747
116
- sinatools/wsd/wsd.py,sha256=gHIBUFXegoY1z3rRnIlK6TduhYq2BTa_dHakOjOlT4k,4434
117
- SinaTools-0.1.41.dist-info/AUTHORS.rst,sha256=aTWeWlIdfLi56iLJfIUAwIrmqDcgxXKLji75_Fjzjyg,174
118
- SinaTools-0.1.41.dist-info/LICENSE,sha256=uwsKYG4TayHXNANWdpfMN2lVW4dimxQjA_7vuCVhD70,1088
119
- SinaTools-0.1.41.dist-info/METADATA,sha256=9zBmOUN3RovUR57RCZeJNWwTxjSqEmLSjka3SL04KZA,3410
120
- SinaTools-0.1.41.dist-info/WHEEL,sha256=9Hm2OB-j1QcCUq9Jguht7ayGIIZBRTdOXD1qg9cCgPM,109
121
- SinaTools-0.1.41.dist-info/entry_points.txt,sha256=_CsRKM_tSCWV5hefBNUsWf9_6DrJnzFlxeAo1wm5XqY,1302
122
- SinaTools-0.1.41.dist-info/top_level.txt,sha256=8tNdPTeJKw3TQCaua8IJIx6N6WpgZZmVekf1OdBNJpE,10
123
- SinaTools-0.1.41.dist-info/RECORD,,
@@ -1,14 +0,0 @@
1
- # coding=utf-8
2
- # Copyright 2018 The Google AI Language Team Authors.
3
- #
4
- # Licensed under the Apache License, Version 2.0 (the "License");
5
- # you may not use this file except in compliance with the License.
6
- # You may obtain a copy of the License at
7
- #
8
- # http://www.apache.org/licenses/LICENSE-2.0
9
- #
10
- # Unless required by applicable law or agreed to in writing, software
11
- # distributed under the License is distributed on an "AS IS" BASIS,
12
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- # See the License for the specific language governing permissions and
14
- # limitations under the License.