nltkor 1.2.14__cp311-cp311-macosx_13_0_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. nltkor/Kor_char.py +193 -0
  2. nltkor/__init__.py +16 -0
  3. nltkor/alignment/__init__.py +1315 -0
  4. nltkor/cider/__init__.py +2 -0
  5. nltkor/cider/cider.py +55 -0
  6. nltkor/cider/cider_scorer.py +207 -0
  7. nltkor/distance/__init__.py +441 -0
  8. nltkor/distance/wasserstein.py +126 -0
  9. nltkor/etc.py +22 -0
  10. nltkor/lazyimport.py +144 -0
  11. nltkor/make_requirement.py +11 -0
  12. nltkor/metrics/__init__.py +63 -0
  13. nltkor/metrics/bartscore.py +301 -0
  14. nltkor/metrics/bertscore.py +331 -0
  15. nltkor/metrics/bleu_tensor.py +20 -0
  16. nltkor/metrics/classical.py +847 -0
  17. nltkor/metrics/entment.py +24 -0
  18. nltkor/metrics/eval.py +517 -0
  19. nltkor/metrics/mauve.py +273 -0
  20. nltkor/metrics/mauve_utils.py +131 -0
  21. nltkor/misc/__init__.py +11 -0
  22. nltkor/misc/string2string_basic_functions.py +59 -0
  23. nltkor/misc/string2string_default_tokenizer.py +83 -0
  24. nltkor/misc/string2string_hash_functions.py +159 -0
  25. nltkor/misc/string2string_word_embeddings.py +503 -0
  26. nltkor/search/__init__.py +10 -0
  27. nltkor/search/classical.py +569 -0
  28. nltkor/search/faiss_search.py +787 -0
  29. nltkor/search/kobert_tokenizer.py +181 -0
  30. nltkor/sejong/__init__.py +3 -0
  31. nltkor/sejong/__pycache__/__init__.cpython-38.pyc +0 -0
  32. nltkor/sejong/__pycache__/__init__.cpython-39.pyc +0 -0
  33. nltkor/sejong/__pycache__/sejong_download.cpython-38.pyc +0 -0
  34. nltkor/sejong/__pycache__/sejong_download.cpython-39.pyc +0 -0
  35. nltkor/sejong/__pycache__/ssem.cpython-38.pyc +0 -0
  36. nltkor/sejong/__pycache__/ssem.cpython-39.pyc +0 -0
  37. nltkor/sejong/ch.py +12 -0
  38. nltkor/sejong/dict_semClassNum.txt +491 -0
  39. nltkor/sejong/layer.txt +630 -0
  40. nltkor/sejong/sejong_download.py +87 -0
  41. nltkor/sejong/ssem.py +684 -0
  42. nltkor/similarity/__init__.py +3 -0
  43. nltkor/similarity/bartscore____.py +337 -0
  44. nltkor/similarity/bertscore____.py +339 -0
  45. nltkor/similarity/classical.py +245 -0
  46. nltkor/similarity/cosine_similarity.py +175 -0
  47. nltkor/tag/__init__.py +71 -0
  48. nltkor/tag/__pycache__/__init__.cpython-38.pyc +0 -0
  49. nltkor/tag/__pycache__/__init__.cpython-39.pyc +0 -0
  50. nltkor/tag/__pycache__/espresso_tag.cpython-38.pyc +0 -0
  51. nltkor/tag/__pycache__/espresso_tag.cpython-39.pyc +0 -0
  52. nltkor/tag/espresso_tag.py +220 -0
  53. nltkor/tag/libs/__init__.py +10 -0
  54. nltkor/tag/libs/__pycache__/__init__.cpython-38.pyc +0 -0
  55. nltkor/tag/libs/__pycache__/__init__.cpython-39.pyc +0 -0
  56. nltkor/tag/libs/__pycache__/attributes.cpython-38.pyc +0 -0
  57. nltkor/tag/libs/__pycache__/attributes.cpython-39.pyc +0 -0
  58. nltkor/tag/libs/__pycache__/config.cpython-38.pyc +0 -0
  59. nltkor/tag/libs/__pycache__/config.cpython-39.pyc +0 -0
  60. nltkor/tag/libs/__pycache__/metadata.cpython-38.pyc +0 -0
  61. nltkor/tag/libs/__pycache__/metadata.cpython-39.pyc +0 -0
  62. nltkor/tag/libs/__pycache__/reader.cpython-38.pyc +0 -0
  63. nltkor/tag/libs/__pycache__/reader.cpython-39.pyc +0 -0
  64. nltkor/tag/libs/__pycache__/taggers.cpython-38.pyc +0 -0
  65. nltkor/tag/libs/__pycache__/taggers.cpython-39.pyc +0 -0
  66. nltkor/tag/libs/__pycache__/utils.cpython-38.pyc +0 -0
  67. nltkor/tag/libs/__pycache__/utils.cpython-39.pyc +0 -0
  68. nltkor/tag/libs/__pycache__/word_dictionary.cpython-38.pyc +0 -0
  69. nltkor/tag/libs/__pycache__/word_dictionary.cpython-39.pyc +0 -0
  70. nltkor/tag/libs/arguments.py +280 -0
  71. nltkor/tag/libs/attributes.py +231 -0
  72. nltkor/tag/libs/config.py +159 -0
  73. nltkor/tag/libs/metadata.py +129 -0
  74. nltkor/tag/libs/ner/__init__.py +2 -0
  75. nltkor/tag/libs/ner/__pycache__/__init__.cpython-38.pyc +0 -0
  76. nltkor/tag/libs/ner/__pycache__/__init__.cpython-39.pyc +0 -0
  77. nltkor/tag/libs/ner/__pycache__/ner_reader.cpython-38.pyc +0 -0
  78. nltkor/tag/libs/ner/__pycache__/ner_reader.cpython-39.pyc +0 -0
  79. nltkor/tag/libs/ner/macmorphoreader.py +7 -0
  80. nltkor/tag/libs/ner/ner_reader.py +92 -0
  81. nltkor/tag/libs/network.c +72325 -0
  82. nltkor/tag/libs/network.cpython-311-darwin.so +0 -0
  83. nltkor/tag/libs/network.pyx +878 -0
  84. nltkor/tag/libs/networkconv.pyx +1028 -0
  85. nltkor/tag/libs/networkdependencyconv.pyx +451 -0
  86. nltkor/tag/libs/parse/__init__.py +1 -0
  87. nltkor/tag/libs/parse/__pycache__/__init__.cpython-38.pyc +0 -0
  88. nltkor/tag/libs/parse/__pycache__/__init__.cpython-39.pyc +0 -0
  89. nltkor/tag/libs/parse/__pycache__/parse_reader.cpython-38.pyc +0 -0
  90. nltkor/tag/libs/parse/__pycache__/parse_reader.cpython-39.pyc +0 -0
  91. nltkor/tag/libs/parse/parse_reader.py +283 -0
  92. nltkor/tag/libs/pos/__init__.py +2 -0
  93. nltkor/tag/libs/pos/__pycache__/__init__.cpython-38.pyc +0 -0
  94. nltkor/tag/libs/pos/__pycache__/__init__.cpython-39.pyc +0 -0
  95. nltkor/tag/libs/pos/__pycache__/pos_reader.cpython-38.pyc +0 -0
  96. nltkor/tag/libs/pos/__pycache__/pos_reader.cpython-39.pyc +0 -0
  97. nltkor/tag/libs/pos/macmorphoreader.py +7 -0
  98. nltkor/tag/libs/pos/pos_reader.py +97 -0
  99. nltkor/tag/libs/reader.py +485 -0
  100. nltkor/tag/libs/srl/__init__.py +3 -0
  101. nltkor/tag/libs/srl/__pycache__/__init__.cpython-38.pyc +0 -0
  102. nltkor/tag/libs/srl/__pycache__/__init__.cpython-39.pyc +0 -0
  103. nltkor/tag/libs/srl/__pycache__/srl_reader.cpython-38.pyc +0 -0
  104. nltkor/tag/libs/srl/__pycache__/srl_reader.cpython-39.pyc +0 -0
  105. nltkor/tag/libs/srl/__pycache__/train_srl.cpython-38.pyc +0 -0
  106. nltkor/tag/libs/srl/__pycache__/train_srl.cpython-39.pyc +0 -0
  107. nltkor/tag/libs/srl/__srl_reader_.py +535 -0
  108. nltkor/tag/libs/srl/srl_reader.py +436 -0
  109. nltkor/tag/libs/srl/train_srl.py +87 -0
  110. nltkor/tag/libs/taggers.py +926 -0
  111. nltkor/tag/libs/utils.py +384 -0
  112. nltkor/tag/libs/word_dictionary.py +239 -0
  113. nltkor/tag/libs/wsd/__init__.py +2 -0
  114. nltkor/tag/libs/wsd/__pycache__/__init__.cpython-38.pyc +0 -0
  115. nltkor/tag/libs/wsd/__pycache__/__init__.cpython-39.pyc +0 -0
  116. nltkor/tag/libs/wsd/__pycache__/wsd_reader.cpython-38.pyc +0 -0
  117. nltkor/tag/libs/wsd/__pycache__/wsd_reader.cpython-39.pyc +0 -0
  118. nltkor/tag/libs/wsd/macmorphoreader.py +7 -0
  119. nltkor/tag/libs/wsd/wsd_reader.py +93 -0
  120. nltkor/tokenize/__init__.py +62 -0
  121. nltkor/tokenize/ko_tokenize.py +115 -0
  122. nltkor/trans.py +121 -0
  123. nltkor-1.2.14.dist-info/LICENSE.txt +1093 -0
  124. nltkor-1.2.14.dist-info/METADATA +41 -0
  125. nltkor-1.2.14.dist-info/RECORD +127 -0
  126. nltkor-1.2.14.dist-info/WHEEL +5 -0
  127. nltkor-1.2.14.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1028 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ """
4
+ A convolutional neural network for NLP tagging tasks like SRL.
5
+ It employs feature tables to store feature vectors for each token.
6
+ """
7
+
8
+ import numpy as np
9
+ cimport numpy as np
10
+
11
+ cdef class ConvolutionalNetwork(Network):
12
+
13
+ # transition and distance feature tables
14
+ cdef public np.ndarray target_dist_table, pred_dist_table
15
+ cdef readonly np.ndarray target_dist_weights, pred_dist_weights
16
+ cdef readonly int target_dist_offset, pred_dist_offset
17
+ cdef readonly np.ndarray target_dist_lookup, pred_dist_lookup
18
+ cdef readonly np.ndarray target_convolution_lookup, pred_convolution_lookup
19
+ cdef readonly np.ndarray target_dist_deltas, pred_dist_deltas
20
+
21
+ # the second hidden layer
22
+ cdef readonly int hidden2_size
23
+ cdef readonly np.ndarray hidden2_weights, hidden2_bias
24
+ cdef readonly np.ndarray hidden2_values
25
+ cdef readonly np.ndarray hidden2_before_activation, hidden_before_activation
26
+
27
+ # lookup of convolution values (the same for each sentence, used to save time)
28
+ cdef np.ndarray convolution_lookup
29
+
30
+ # maximum convolution indices
31
+ cdef readonly np.ndarray max_indices
32
+
33
+ # number of targets (all tokens in a sentence or the provided arguments)
34
+ # and variables for argument classifying
35
+ cdef int num_targets
36
+ cdef bool only_classify
37
+
38
+ # for faster access
39
+ cdef int half_window
40
+
41
+ # the convolution gradients
42
+ cdef np.ndarray hidden_gradients, hidden2_gradients
43
+ cdef np.ndarray input_deltas
44
+
45
+ # keeping statistics
46
+ cdef int num_sentences
47
+
48
+ # validation
49
+ cdef list validation_predicates, validation_arguments
50
+
51
+ @classmethod
52
+ def create_new(cls, feature_tables, target_dist_table, pred_dist_table,
53
+ int word_window, int hidden1_size, int hidden2_size, int output_size):
54
+ """Creates a new convolutional neural network."""
55
+ # sum the number of features in all tables except for distance
56
+ cdef int input_size = sum(table.shape[1] for table in feature_tables)
57
+ input_size *= word_window
58
+
59
+ dist_features_per_token = target_dist_table.shape[1] + pred_dist_table.shape[1]
60
+ input_size_with_distance = input_size + (word_window * dist_features_per_token)
61
+
62
+ # creates the weight matrices
63
+ high = 2.38 / np.sqrt(input_size_with_distance) # [Bottou-88]
64
+ hidden_weights = np.random.uniform(-high, high, (hidden1_size, input_size))
65
+
66
+ num_dist_features = word_window * target_dist_table.shape[1]
67
+ target_dist_weights = np.random.uniform(-high, high, (num_dist_features, hidden1_size))
68
+ num_dist_features = word_window * pred_dist_table.shape[1]
69
+ pred_dist_weights = np.random.uniform(-high, high, (num_dist_features, hidden1_size))
70
+
71
+ high = 2.38 / np.sqrt(hidden1_size)
72
+ hidden_bias = np.random.uniform(-high, high, hidden1_size)
73
+
74
+ if hidden2_size > 0:
75
+ hidden2_weights = np.random.uniform(-high, high, (hidden2_size, hidden1_size))
76
+ high = 2.38 / np.sqrt(hidden2_size)
77
+ hidden2_bias = np.random.uniform(-high, high, hidden2_size)
78
+ output_dim = (output_size, hidden2_size)
79
+ else:
80
+ hidden2_weights = None
81
+ hidden2_bias = None
82
+ output_dim = (output_size, hidden1_size)
83
+
84
+ high = 2.38 / np.sqrt(output_dim[1])
85
+ output_weights = np.random.uniform(-high, high, output_dim)
86
+ high = 2.38 / np.sqrt(output_size)
87
+ output_bias = np.random.uniform(-high, high, output_size)
88
+
89
+ net = cls(word_window, input_size, hidden1_size, hidden2_size,
90
+ output_size, hidden_weights, hidden_bias,
91
+ target_dist_weights, pred_dist_weights,
92
+ hidden2_weights, hidden2_bias,
93
+ output_weights, output_bias)
94
+ net.feature_tables = feature_tables
95
+ net.target_dist_table = target_dist_table
96
+ net.pred_dist_table = pred_dist_table
97
+
98
+ return net
99
+
100
+ def description(self):
101
+ """Returns a textual description of the network."""
102
+ hidden2_size = 0 if self.hidden2_weights is None else self.hidden2_size
103
+ table_dims = [str(t.shape[1]) for t in self.feature_tables]
104
+ table_dims = ', '.join(table_dims)
105
+
106
+ dist_table_dims = '%d, %d' % (self.target_dist_table.shape[1], self.pred_dist_table.shape[1])
107
+
108
+ desc = """
109
+ Word window size: %d
110
+ Feature table sizes: %s
111
+ Distance table sizes (target and predicate): %s
112
+ Input layer size: %d
113
+ Convolution layer size: %d
114
+ Second hidden layer size: %d
115
+ Output size: %d
116
+ """ % (self.word_window_size, table_dims, dist_table_dims, self.input_size, self.hidden_size,
117
+ hidden2_size, self.output_size)
118
+
119
+ return desc
120
+
121
+
122
+ def __init__(self, word_window, input_size, hidden1_size, hidden2_size,
123
+ output_size, hidden1_weights, hidden1_bias, target_dist_weights,
124
+ pred_dist_weights, hidden2_weights, hidden2_bias,
125
+ output_weights, output_bias):
126
+ super(ConvolutionalNetwork, self).__init__(word_window, input_size,
127
+ hidden1_size, output_size,
128
+ hidden1_weights, hidden1_bias,
129
+ output_weights, output_bias)
130
+ self.half_window = word_window / 2
131
+ self.features_per_token = self.input_size / word_window
132
+
133
+ self.transitions = None
134
+ self.target_dist_lookup = None
135
+ self.pred_dist_lookup = None
136
+ self.target_dist_weights = target_dist_weights
137
+ self.pred_dist_weights = pred_dist_weights
138
+
139
+ self.hidden2_size = hidden2_size
140
+ self.hidden2_weights = hidden2_weights
141
+ self.hidden2_bias = hidden2_bias
142
+
143
+ self.validation_predicates = None
144
+ self.validation_arguments = None
145
+
146
+ self.use_learning_rate_decay = False
147
+
148
+ def _generate_save_dict(self):
149
+ """
150
+ Generates a dictionary with all parameters saved by the model.
151
+ It is directly used by the numpy savez function.
152
+ """
153
+ d = dict(hidden_weights=self.hidden_weights,
154
+ target_dist_table=self.target_dist_table,
155
+ pred_dist_table=self.pred_dist_table,
156
+ target_dist_weights=self.target_dist_weights,
157
+ pred_dist_weights=self.pred_dist_weights,
158
+ output_weights=self.output_weights,
159
+ transitions=self.transitions,
160
+ hidden_bias=self.hidden_bias, output_bias=self.output_bias,
161
+ word_window_size=self.word_window_size,
162
+ input_size=self.input_size, hidden_size=self.hidden_size,
163
+ output_size=self.output_size, hidden2_size=self.hidden2_size,
164
+ hidden2_weights=self.hidden2_weights, hidden2_bias=self.hidden2_bias,
165
+ padding_left=self.padding_left, padding_right=self.padding_right,
166
+ feature_tables=self.feature_tables)
167
+ return d
168
+
169
+ def save(self):
170
+ """
171
+ Saves the neural network to a file.
172
+ It will save the weights, biases, sizes, padding and
173
+ distance tables, and other feature tables.
174
+ """
175
+ data = self._generate_save_dict()
176
+ np.savez(self.network_filename, **data)
177
+
178
+ @classmethod
179
+ def _load_from_file(cls, data, filename):
180
+ """
181
+ Internal method for setting data read from a npz file.
182
+ """
183
+ # cython classes don't have the __dict__ attribute
184
+ # so we can't do an elegant self.__dict__.update(data)
185
+ hidden_weights = data['hidden_weights']
186
+ hidden_bias = data['hidden_bias']
187
+ hidden2_weights = data['hidden2_weights']
188
+
189
+ hidden2_bias = data['hidden2_bias']
190
+ output_weights = data['output_weights']
191
+ output_bias = data['output_bias']
192
+
193
+ word_window_size = data['word_window_size']
194
+ input_size = data['input_size']
195
+ hidden_size = data['hidden_size']
196
+ hidden2_size = data['hidden2_size']
197
+ output_size = data['output_size']
198
+
199
+ # numpy stores None as an array containing None and with empty shape
200
+ if hidden2_weights.shape == ():
201
+ hidden2_weights = None
202
+ hidden2_size = 0
203
+ hidden2_bias = None
204
+
205
+ nn = cls(word_window_size, input_size, hidden_size, hidden2_size,
206
+ output_size, hidden_weights, hidden_bias,
207
+ data['target_dist_weights'], data['pred_dist_weights'],
208
+ hidden2_weights, hidden2_bias,
209
+ output_weights, output_bias)
210
+
211
+ nn.target_dist_table = data['target_dist_table']
212
+ nn.pred_dist_table = data['pred_dist_table']
213
+ #transitions = data['transitions']
214
+ #nn.transitions = transitions if transitions.shape != () else None
215
+ nn.padding_left = data['padding_left']
216
+ nn.padding_right = data['padding_right']
217
+ nn.pre_padding = np.array(int(nn.word_window_size // 2) * [nn.padding_left])
218
+ nn.pos_padding = np.array(int(nn.word_window_size // 2) * [nn.padding_right])
219
+ nn.feature_tables = list(data['feature_tables'])
220
+ nn.network_filename = filename
221
+
222
+ return nn
223
+
224
+ @classmethod
225
+ def load_from_file(cls, filename):
226
+ """
227
+ Loads the neural network from a file.
228
+ It will load weights, biases, sizes, padding and
229
+ distance tables, and other feature tables.
230
+ """
231
+ data = np.load(filename, allow_pickle=True) # 수정
232
+ return cls._load_from_file(data, filename)
233
+
234
+ def _load_parameters(self):
235
+ """
236
+ Loads weights, feature tables, distance tables and
237
+ transition tables previously saved.
238
+ """
239
+ data = np.load(self.network_filename)
240
+ self.hidden_weights = data['hidden_weights']
241
+ self.hidden_bias = data['hidden_bias']
242
+ self.output_weights = data['output_weights']
243
+ self.output_bias = data['output_bias']
244
+ self.feature_tables = list(data['feature_tables'])
245
+ self.target_dist_table = data['target_dist_table']
246
+ self.pred_dist_table = data['pred_dist_table']
247
+
248
+ # check if transitions isn't None (numpy saves everything as an array)
249
+ if data['transitions'].shape != ():
250
+ self.transitions = data['transitions']
251
+ else:
252
+ self.transitions = None
253
+
254
+ # same for second hidden layer weights
255
+ if data['hidden2_weights'].shape != ():
256
+ self.hidden2_weights = data['hidden2_weights']
257
+ self.hidden2_bias = data['hidden2_bias']
258
+ else:
259
+ self.hidden2_weights = None
260
+
261
+ def set_validation_data(self, list validation_sentences,
262
+ list validation_predicates,
263
+ list validation_tags,
264
+ list validation_arguments=None):
265
+ """
266
+ Sets the data to be used in validation during training. If this function
267
+ is not called before training, the training data itself is used to
268
+ measure the model's performance.
269
+ """
270
+ self.validation_sentences = validation_sentences
271
+ self.validation_predicates = validation_predicates
272
+ self.validation_tags = validation_tags
273
+ self.validation_arguments = validation_arguments
274
+
275
+ def train(self, list sentences, list predicates, list tags,
276
+ int epochs, int epochs_between_reports=0,
277
+ float desired_accuracy=0, list arguments=None):
278
+ """
279
+ Trains the convolutional network. Refer to the basic Network
280
+ train method for detailed explanation.
281
+
282
+ :param predicates: a list of 1-dim numpy array
283
+ indicating the indices of predicates in each sentence.
284
+ :param arguments: (only for argument classifying) a list of 2-dim
285
+ numpy arrays indicating the start and end of each argument.
286
+ """
287
+ self.num_sentences = len(sentences)
288
+ self.num_tokens = sum(len(sent) for sent in sentences)
289
+ self.only_classify = arguments is not None
290
+
291
+ logger = logging.getLogger("Logger")
292
+ logger.info("Training for up to %d epochs" % epochs)
293
+ last_accuracy = 0
294
+ top_accuracy = 0
295
+ last_error = np.Infinity
296
+
297
+ if self.validation_sentences is None:
298
+ self.set_validation_data(sentences, predicates, tags, arguments)
299
+
300
+ for i in xrange(epochs):
301
+ self.decrease_learning_rates(i)
302
+ self._train_epoch(sentences, predicates, tags, arguments)
303
+ self._validate()
304
+
305
+ # Attardi: save model
306
+ if self.accuracy > top_accuracy:
307
+ top_accuracy = self.accuracy
308
+ self.save()
309
+ logger.debug("Saved model")
310
+ elif self.use_learning_rate_decay:
311
+ # this iteration didn't bring improvements; load the last saved model
312
+ # before continuing training with a lower rate
313
+ self._load_parameters()
314
+
315
+ if (epochs_between_reports > 0 and i % epochs_between_reports == 0) \
316
+ or self.accuracy >= desired_accuracy > 0 \
317
+ or (self.accuracy < last_accuracy and self.error > last_error):
318
+
319
+ self._print_epoch_report(i + 1)
320
+
321
+ if self.accuracy >= desired_accuracy > 0\
322
+ or (self.accuracy < last_accuracy and self.error > last_error):
323
+ # accuracy is falling, the network is probably diverging
324
+ # or overfitting
325
+ break
326
+
327
+ last_accuracy = self.accuracy
328
+ last_error = self.error
329
+
330
+ self.num_sentences = 0
331
+ self.num_tokens = 0
332
+ self._reset_counters()
333
+
334
+ def _reset_counters(self):
335
+ """
336
+ Reset the performance statistics counters. They are updated during
337
+ each epoch.
338
+ """
339
+ self.error = 0
340
+ self.skips = 0
341
+ self.float_errors = 0
342
+
343
+ def _shuffle_data(self, sentences, predicates, tags, arguments=None):
344
+ """
345
+ Shuffle the given training data in place.
346
+ """
347
+ # get the random number generator state in order to shuffle
348
+ # sentences and their tags in the same order
349
+ random_state = np.random.get_state()
350
+ np.random.shuffle(sentences)
351
+ np.random.set_state(random_state)
352
+ np.random.shuffle(predicates)
353
+ np.random.set_state(random_state)
354
+ np.random.shuffle(tags)
355
+ if arguments is not None:
356
+ np.random.set_state(random_state)
357
+ np.random.shuffle(arguments)
358
+
359
+
360
+ def _train_epoch(self, sentences, predicates, tags, arguments):
361
+ """Trains for one epoch with all examples."""
362
+
363
+ self._reset_counters()
364
+ self._shuffle_data(sentences, predicates, tags, arguments)
365
+ if arguments is not None:
366
+ i_args = iter(arguments)
367
+ else:
368
+ sent_args = None
369
+
370
+ for sent, sent_preds, sent_tags in zip(sentences, predicates, tags):
371
+ if arguments is not None:
372
+ sent_args = next(i_args)
373
+ #sent_args = i_args.next()
374
+
375
+ try:
376
+ self._tag_sentence(sent, sent_preds, sent_tags, sent_args)
377
+ except FloatingPointError:
378
+ # just ignore the sentence in case of an overflow
379
+ self.float_errors += 1
380
+
381
+ def tag_sentence(self, np.ndarray sentence, np.ndarray predicates,
382
+ list arguments=None, bool logprob=False,
383
+ bool allow_repeats=True):
384
+ """
385
+ Runs the network for each element in the sentence and returns
386
+ the sequence of tags.
387
+
388
+ :param sentence: a 2-dim numpy array, where each item encodes a token.
389
+ :param predicates: a 1-dim numpy array, indicating the position
390
+ of the predicates in the sentence
391
+ :param logprob: a boolean indicating whether to return the
392
+ log-probability for each answer or not.
393
+ :param allow_repeats: a boolean indicating whether to allow repeated
394
+ argument classes (only for separate argument classification).
395
+ """
396
+ self.only_classify = arguments is not None
397
+ return self._tag_sentence(sentence, predicates, argument_blocks=arguments,
398
+ logprob=logprob, allow_repeats=allow_repeats)
399
+
400
+ cdef np.ndarray argument_distances(self, positions, argument):
401
+ """
402
+ Calculates the distance from each token in the sentence to the argument.
403
+ """
404
+ distances = positions.copy()
405
+
406
+ # the ones before the argument
407
+ #print(argument)
408
+ lo = np.less(positions, argument[0])
409
+ distances[lo] -= argument[0]
410
+
411
+ # the ones after the argument
412
+ hi = np.greater(positions, argument[1])
413
+ distances[hi] -= argument[1]
414
+
415
+ # the ones inside the argument
416
+ distances[np.logical_not(hi | lo)] = 0
417
+
418
+ return distances
419
+
420
+
421
+ @cython.boundscheck(False)
422
+ @cython.wraparound(False)
423
+ def _sentence_convolution(self, sentence, predicate, argument_blocks=None,
424
+ training=False):
425
+ """
426
+ Perform the convolution for a given predicate.
427
+
428
+ :param sentence: a sequence of tokens, each represented as an array of
429
+ indices
430
+ :param predicate: the index of the predicate in the sentence
431
+ :param argument_blocks: (used only in SRL argument classification) the
432
+ starting and end positions of all delimited arguments
433
+ :return: the scores for all tokens with respect to the given predicate
434
+ """
435
+ # store the values found by each convolution neuron here and then find the max
436
+ cdef np.ndarray[FLOAT_t, ndim=2] convolution_values
437
+
438
+ # a priori scores for all tokens
439
+ cdef np.ndarray[FLOAT_t, ndim=2] scores
440
+
441
+ # intermediate storage
442
+ cdef np.ndarray[FLOAT_t, ndim=2] input_and_pred_dist_values
443
+
444
+ self.num_targets = len(sentence) if argument_blocks is None else len(argument_blocks)
445
+
446
+ # maximum values found by convolution
447
+ self.hidden_values = np.zeros((self.num_targets, self.hidden_size))
448
+
449
+ if training:
450
+ # hidden sent values: results after tanh
451
+ self.hidden_values = np.zeros((self.num_targets, self.hidden_size))
452
+ self.max_indices = np.empty((self.num_targets, self.hidden_size), int)
453
+ #self.max_indices = np.empty((self.num_targets, self.hidden_size), np.int)
454
+
455
+ # predicate distances are the same across all targets
456
+ pred_dist_indices = np.arange(len(sentence)) - predicate
457
+ pred_dist_values = self.pred_convolution_lookup.take(pred_dist_indices + self.pred_dist_offset,
458
+ 0, mode='clip')
459
+
460
+ input_and_pred_dist_values = pred_dist_values + self.convolution_lookup
461
+
462
+ for target in range(self.num_targets):
463
+ # loop over targets and add the weighted distance features to each token
464
+ # this is necessary for the convolution layer
465
+
466
+ # distance features for each window
467
+ # if we are classifying all tokens, pick the distance to the target
468
+ # if we are classifying arguments, pick the distance to the closest boundary
469
+ # of the argument (beginning or end)
470
+ if argument_blocks is None:
471
+ target_dist_indices = np.arange(len(sentence)) - target
472
+ else:
473
+ argument = argument_blocks[target]
474
+ #print(argument_blocks)
475
+ target_dist_indices = self.argument_distances(np.arange(len(sentence)), argument)
476
+
477
+ target_dist_values = self.target_convolution_lookup.take(target_dist_indices + self.target_dist_offset,
478
+ 0, mode='clip')
479
+
480
+ convolution_values = target_dist_values + input_and_pred_dist_values
481
+
482
+ # now, find the maximum values
483
+ if training:
484
+ self.max_indices[target] = convolution_values.argmax(0)
485
+ self.hidden_values[target] = convolution_values.max(0)
486
+
487
+ # apply the bias and proceed to the next layer
488
+ self.hidden_values += self.hidden_bias
489
+
490
+ if self.hidden2_weights is not None:
491
+ self.hidden2_values = self.hidden_values.dot(self.hidden2_weights.T) + self.hidden2_bias
492
+
493
+ if training:
494
+ self.hidden2_before_activation = self.hidden2_values.copy()
495
+
496
+ hardtanh(self.hidden2_values, inplace=True)
497
+ else:
498
+ # apply non-linearity here
499
+ if training:
500
+ self.hidden_before_activation = self.hidden_values.copy()
501
+
502
+ self.hidden2_values = self.hidden_values
503
+ hardtanh(self.hidden_values, inplace=True)
504
+
505
+ scores = self.hidden2_values.dot(self.output_weights.T) + self.output_bias
506
+
507
+ return scores
508
+
509
+ def _pre_tagging_setup(self, np.ndarray sentence, bool training):
510
+ """
511
+ Perform some initialization actions before the actual tagging.
512
+ """
513
+ if training:
514
+ # this table will store the values of the neurons for each input token
515
+ # they will be needed during weight adjustments
516
+ self.input_sent_values = np.empty((len(sentence), self.input_size))
517
+
518
+ # store the convolution values to save time
519
+ self._create_convolution_lookup(sentence, training)
520
+
521
+ if self.target_dist_lookup is None: self._create_target_lookup()
522
+ if self.pred_dist_lookup is None: self._create_pred_lookup()
523
+
524
+
525
+ @cython.boundscheck(False)
526
+ @cython.wraparound(False)
527
+ def _tag_sentence(self, np.ndarray sentence, np.ndarray predicates,
528
+ list tags=None, list argument_blocks=None,
529
+ bool allow_repeats=True, bool logprob=False):
530
+ """
531
+ Runs the network for every predicate in the sentence.
532
+ Refer to the Network class for more information.
533
+
534
+ :param tags: this is a list rather than a numpy array because in
535
+ argument classification, each predicate may have a differente number
536
+ of arguments.
537
+ :param argument_blocks: (used only in SRL argument classification) a list
538
+ with the starting and end positions of all delimited arguments (one for
539
+ each predicate)
540
+ :param predicates: a numpy array with the indices of the predicates in the sentence.
541
+ """
542
+ answer = []
543
+ training = tags is not None
544
+ self._pre_tagging_setup(sentence, training)
545
+ cdef np.ndarray[FLOAT_t, ndim=2] token_scores
546
+
547
+ for i, predicate in enumerate(predicates):
548
+ pred_arguments = None if not self.only_classify else argument_blocks[i]
549
+ pred_arguments = argument_blocks[i]
550
+
551
+ token_scores = self._sentence_convolution(sentence, predicate, pred_arguments, training)
552
+ pred_answer = self._viterbi(token_scores, allow_repeats)
553
+
554
+ if training:
555
+ pred_tags = tags[i]
556
+ if self._calculate_gradients(pred_tags, token_scores):
557
+ self._backpropagate()
558
+ self._calculate_input_deltas(sentence, predicate, pred_arguments)
559
+ self._adjust_weights(predicate, pred_arguments)
560
+ self._adjust_features(sentence, predicate)
561
+
562
+ if logprob:
563
+ if self.only_classify:
564
+ raise NotImplementedError('Confidence measure not implemented for argument classifying')
565
+
566
+ all_scores = self._calculate_all_scores(token_scores)
567
+ last_token = len(sentence) - 1
568
+ logadd = np.log(np.sum(np.exp(all_scores[last_token])))
569
+ confidence = self.answer_score - logadd
570
+ pred_answer = (pred_answer, confidence)
571
+
572
+ answer.append(pred_answer)
573
+
574
+ return answer
575
+
576
+ def _validate(self):
577
+ """
578
+ Evaluates the network performance, updating its hits count.
579
+ """
580
+ # call it "item" instead of token because the same token may be counted
581
+ # more than once (sentences with multiple predicates)
582
+ num_items = 0
583
+ hits = 0
584
+
585
+ if self.validation_arguments is not None:
586
+ i_args = iter(self.validation_arguments)
587
+ else:
588
+ sent_args = None
589
+
590
+ for sent, sent_preds, sent_tags in zip(self.validation_sentences,
591
+ self.validation_predicates,
592
+ self.validation_tags):
593
+ if self.validation_arguments is not None:
594
+ sent_args = next(i_args)
595
+ #sent_args = i_args.next()
596
+
597
+ answer = self._tag_sentence(sent, sent_preds, None, sent_args)
598
+ for predicate_answer, predicate_tags in zip(answer, sent_tags):
599
+ for net_tag, gold_tag in zip(predicate_answer, predicate_tags):
600
+ if net_tag == gold_tag:
601
+ hits += 1
602
+
603
+ num_items += len(predicate_answer)
604
+
605
+ self.accuracy = float(hits) / num_items
606
+ # normalize error
607
+ self.error /= num_items
608
+
609
+ def _calculate_gradients(self, tags, scores):
610
+ """Delegates the call to the appropriate function."""
611
+ if self.only_classify:
612
+ return self._calculate_gradients_classify(tags, scores)
613
+ else:
614
+ return self._calculate_gradients_sll(tags, scores)
615
+
616
+ def _calculate_gradients_classify(self, tags, scores):
617
+ """
618
+ Calculates the output deltas for each target in a network that only
619
+ classifies predelimited arguments.
620
+ The aim is to minimize the cost, for each argument:
621
+ logadd(score for all possible tags) - score(correct tag)
622
+
623
+ :returns: whether a correction is necessary or not.
624
+ """
625
+ self.net_gradients = np.zeros_like(scores, float)
626
+ #self.net_gradients = np.zeros_like(scores, np.float)
627
+ correction = False
628
+
629
+ for i, tag_scores in enumerate(scores):
630
+ tag = tags[i]
631
+
632
+ exponentials = np.exp(tag_scores)
633
+ exp_sum = np.sum(exponentials)
634
+ logadd = np.log(exp_sum)
635
+
636
+ # update the total error
637
+ error = logadd - tag_scores[tag]
638
+ self.error += error
639
+
640
+ # like the non-convolutional network, don't adjust weights if the error
641
+ # is too low. An error of 0.01 means a log-prob of -0.01 for the right
642
+ # tag, i.e., more than 99% probability
643
+ if error <= 0.01:
644
+ self.skips += 1
645
+ continue
646
+
647
+ correction = True
648
+ self.net_gradients[i] = - exponentials / exp_sum
649
+ self.net_gradients[i, tag] += 1
650
+
651
+ return correction
652
+
653
+ def _backpropagate(self):
654
+ """Backpropagates the error gradient."""
655
+
656
+ # this function only determines the gradients at each layer, without
657
+ # adjusting weights. This is done because the input features must
658
+ # be adjusted with the first weight matrix unchanged.
659
+
660
+ # gradient[i][j] has the gradient for token i at neuron j
661
+
662
+ # derivative with respect to the non-linearity layer (tanh)
663
+ dCd_tanh = self.net_gradients.dot(self.output_weights)
664
+
665
+ if self.hidden2_weights is not None:
666
+ # derivative with respect to the second hidden layer
667
+ dCd_hidden2 = dCd_tanh * hardtanhd(self.hidden2_before_activation)
668
+ self.hidden2_gradients = dCd_hidden2
669
+
670
+ self.hidden_gradients = self.hidden2_gradients.dot(self.hidden2_weights)
671
+ else:
672
+ # the non-linearity appears right after the convolution max
673
+ self.hidden_gradients = dCd_tanh * hardtanhd(self.hidden_before_activation)
674
+
675
+ @cython.boundscheck(False)
676
+ @cython.wraparound(False)
677
+ def _adjust_weights(self, predicate, arguments=None):
678
+ """
679
+ Adjusts the network weights after gradients have been calculated.
680
+ """
681
+ cdef int i
682
+ cdef np.ndarray[FLOAT_t, ndim=1] gradients_t
683
+ cdef np.ndarray[FLOAT_t, ndim=2] last_values, deltas, grad_matrix, input_values
684
+
685
+ last_values = self.hidden2_values if self.hidden2_weights is not None else self.hidden_values
686
+ deltas = self.net_gradients.T.dot(last_values) * self.learning_rate
687
+ self.output_weights += deltas
688
+ self.output_bias += self.net_gradients.sum(0) * self.learning_rate
689
+
690
+ if self.hidden2_weights is not None:
691
+ deltas = self.hidden2_gradients.T.dot(self.hidden_values) * self.learning_rate
692
+ self.hidden2_weights += deltas
693
+ self.hidden2_bias += self.hidden2_gradients.sum(0) * self.learning_rate
694
+
695
+ # now adjust weights from input to convolution. these will be trickier.
696
+ # we need to know which input value to use in the delta formula
697
+
698
+ # I tried vectorizing this loop but it got a bit slower, probably because
699
+ # of the overhead in building matrices/tensors with the max indices
700
+ for i, neuron_maxes in enumerate(self.max_indices):
701
+ # i indicates the i-th target
702
+
703
+ gradients_t = self.hidden_gradients[i] * self.learning_rate
704
+
705
+ # table containing in each line the input values selected for each convolution neuron
706
+ input_values = self.input_sent_values.take(neuron_maxes, 0)
707
+
708
+ # stack the gradients to multiply all weights for a neuron
709
+ grad_matrix = np.tile(gradients_t, [self.input_size, 1]).T
710
+ self.hidden_weights += grad_matrix * input_values
711
+
712
+ # target distance weights
713
+ # get the relative distance from each max token to its target
714
+ if arguments is None:
715
+ target_dists = neuron_maxes - i
716
+ else:
717
+ argument = arguments[i]
718
+ target_dists = self.argument_distances(neuron_maxes, argument)
719
+
720
+ dist_features = self.target_dist_lookup.take(target_dists + self.target_dist_offset,
721
+ 0, mode='clip')
722
+ grad_matrix = np.tile(gradients_t, [self.target_dist_weights.shape[0], 1]).T
723
+ self.target_dist_weights += (grad_matrix * dist_features).T
724
+
725
+ # predicate distance weights
726
+ # get the distance from each max token to its predicate
727
+ pred_dists = neuron_maxes - predicate
728
+ dist_features = self.pred_dist_lookup.take(pred_dists + self.pred_dist_offset,
729
+ 0, mode='clip')
730
+ # try to recycle the grad_matrix if sizes match
731
+ if self.target_dist_weights.shape[0] != self.pred_dist_weights.shape[0]:
732
+ grad_matrix = np.tile(gradients_t, [self.pred_dist_weights.shape[0], 1]).T
733
+
734
+ self.pred_dist_weights += (grad_matrix * dist_features).T
735
+
736
+ self.hidden_bias += self.hidden_gradients.sum(0) * self.learning_rate
737
+
738
+ # Adjusts the transition scores table with the calculated gradients.
739
+ if not self.only_classify and self.transitions is not None:
740
+ self.transitions += self.trans_gradients * self.learning_rate_trans
741
+
742
+ @cython.boundscheck(False)
743
+ @cython.wraparound(False)
744
+ def _calculate_input_deltas(self, np.ndarray sentence, int predicate,
745
+ object arguments=None):
746
+ """
747
+ Calculates the input deltas to be applied in the feature tables.
748
+ """
749
+ cdef np.ndarray[FLOAT_t, ndim=2] hidden_gradients, input_gradients
750
+ cdef np.ndarray[FLOAT_t, ndim=2] target_dist_gradients, pred_dist_gradients
751
+ cdef np.ndarray[FLOAT_t, ndim=1] gradients
752
+ cdef np.ndarray[INT_t, ndim=1] convolution_max, target_dists
753
+
754
+ # matrices accumulating gradients over each target
755
+ # each matrix has a whole window in each line
756
+ input_gradients = np.zeros((len(sentence), self.hidden_size))
757
+ target_dist_gradients = np.zeros((self.target_dist_lookup.shape[0], self.hidden_size))
758
+ pred_dist_gradients = np.zeros((self.pred_dist_lookup.shape[0], self.hidden_size))
759
+
760
+ # avoid multiplying by the learning rate multiple times
761
+ hidden_gradients = self.hidden_gradients * self.learning_rate_features
762
+ cdef np.ndarray[INT_t, ndim=1] column_numbers = np.arange(self.hidden_size)
763
+
764
+ for target in range(self.num_targets):
765
+
766
+ # array with the tokens that yielded the maximum value in each neuron
767
+ # for this target
768
+ convolution_max = self.max_indices[target]
769
+
770
+ if not self.only_classify:
771
+ target_dists = convolution_max - target
772
+ else:
773
+ argument = arguments[target]
774
+ target_dists = self.argument_distances(convolution_max, argument)
775
+
776
+ target_dists = np.clip(target_dists + self.target_dist_offset, 0,
777
+ self.target_dist_lookup.shape[0] - 1)
778
+ pred_dists = convolution_max - predicate
779
+ pred_dists = np.clip(pred_dists + self.pred_dist_offset, 0,
780
+ self.pred_dist_lookup.shape[0] - 1)
781
+
782
+ gradients = hidden_gradients[target]
783
+
784
+ # sparse matrix with gradients to be applied over the input
785
+ # line i has the gradients for the i-th token in the sentence
786
+ input_gradients[convolution_max, np.arange(self.hidden_size)] += gradients
787
+
788
+ # distance deltas
789
+ target_dist_gradients[target_dists, np.arange(self.hidden_size)] += gradients
790
+ pred_dist_gradients[pred_dists, np.arange(self.hidden_size)] += gradients
791
+
792
+ self.input_deltas = input_gradients.dot(self.hidden_weights)
793
+ self.target_dist_deltas = target_dist_gradients.dot(self.target_dist_weights.T)
794
+ self.pred_dist_deltas = pred_dist_gradients.dot(self.pred_dist_weights.T)
795
+
796
+
797
+ def _adjust_features(self, sentence, predicate):
798
+ """Adjusts the features in all feature tables."""
799
+ # compute each token in the window separately and
800
+ # separate the feature deltas into tables
801
+ start_from = 0
802
+ dist_target_from = 0
803
+ dist_pred_from = 0
804
+
805
+ # number of times that the minimum and maximum distances are repeated
806
+ # in the lookup distance tables
807
+ pre_dist = self.word_window_size
808
+ pos_dist = 1
809
+ if self.word_window_size > 1:
810
+ padded_sentence = np.concatenate((self.pre_padding,
811
+ sentence,
812
+ self.pos_padding))
813
+ else:
814
+ padded_sentence = sentence
815
+
816
+ for i in range(self.word_window_size):
817
+
818
+ for j, table in enumerate(self.feature_tables):
819
+ # this is the column for the i-th position in the window
820
+ # regarding features from the j-th table
821
+ table_deltas = self.input_deltas[:, start_from:start_from + table.shape[1]]
822
+ start_from += table.shape[1]
823
+
824
+ for token, deltas in zip(padded_sentence[i:], table_deltas):
825
+ table[token[j]] += deltas
826
+
827
+ dist_deltas = self.target_dist_deltas[:, dist_target_from : dist_target_from + self.target_dist_table.shape[1] ]
828
+ pre_deltas = dist_deltas.take(np.arange(pre_dist), 0).sum(0)
829
+ pos_deltas = dist_deltas.take(np.arange(-pos_dist, 0), 0).sum(0)
830
+ self.target_dist_table[1:-1, :] += dist_deltas[pre_dist : -pos_dist]
831
+ self.target_dist_table[0] += pre_deltas
832
+ self.target_dist_table[-1] += pos_deltas
833
+ dist_target_from += self.target_dist_table.shape[1]
834
+
835
+ dist_deltas = self.pred_dist_deltas[:, dist_pred_from : dist_pred_from + self.pred_dist_table.shape[1] ]
836
+ pre_deltas = dist_deltas.take(np.arange(pre_dist), 0).sum(0)
837
+ pos_deltas = dist_deltas.take(np.arange(-pos_dist, 0), 0).sum(0)
838
+ self.pred_dist_table[1:-1, :] += dist_deltas[pre_dist : -pos_dist]
839
+ self.pred_dist_table[0] += pre_deltas
840
+ self.pred_dist_table[-1] += pos_deltas
841
+
842
+ pre_dist -= 1
843
+ pos_dist += 1
844
+ dist_pred_from += self.pred_dist_table.shape[1]
845
+
846
+ self._create_target_lookup()
847
+ self._create_pred_lookup()
848
+
849
+ @cython.boundscheck(False)
850
+ def _viterbi(self, np.ndarray[FLOAT_t, ndim=2] scores, bool allow_repeats=True):
851
+ """
852
+ Performs a Viterbi search over the scores for each tag using
853
+ the transitions matrix. If a matrix wasn't supplied,
854
+ it will return the tags with the highest scores individually.
855
+ """
856
+ if self.transitions is None:
857
+ best_scores = scores.argmax(1)
858
+
859
+ if allow_repeats:
860
+ return best_scores
861
+
862
+ # we must find the combination of tags that maximizes the probabilities
863
+ logadd = np.log(np.sum(np.exp(scores), 1))
864
+ logprobs = (scores.T - logadd).T
865
+ counts = np.bincount(best_scores)
866
+
867
+ while counts.max() != 1:
868
+ # find the tag with the most conflicting args
869
+ conflicting_tag = counts.argmax()
870
+
871
+ # arguments with that tag as current maximum
872
+ args = np.where(best_scores == conflicting_tag)[0]
873
+
874
+ # get the logprobs for those args having this tag
875
+ conflicting_probs = logprobs[args, conflicting_tag]
876
+
877
+ # find the argument with the highest probability for that tag
878
+ highest_prob_arg = args[conflicting_probs.argmax()]
879
+
880
+ # set the score for other arguments in that tag to a low value
881
+ other_args = args[args != highest_prob_arg]
882
+ scores[other_args, conflicting_tag] = -1000
883
+
884
+ # and find the new maxes, without recalculating probabilities
885
+ best_scores = scores.argmax(1)
886
+ counts = np.bincount(best_scores)
887
+
888
+ return best_scores
889
+
890
+ path_scores = np.empty_like(scores)
891
+ path_backtrack = np.empty_like(scores, int)
892
+ #path_backtrack = np.empty_like(scores, np.int)
893
+
894
+ # now the actual Viterbi algorithm
895
+ # first, get the scores for each tag at token 0
896
+ # the last row of the transitions table has the scores for the first tag
897
+ path_scores[0] = scores[0] + self.transitions[-1]
898
+
899
+ for i, token in enumerate(scores[1:], 1):
900
+
901
+ # each line contains the score until each tag t plus the transition to each other tag t'
902
+ prev_score_and_trans = (path_scores[i - 1] + self.transitions[:-1].T).T
903
+
904
+ # find the previous tag that yielded the max score
905
+ path_backtrack[i] = prev_score_and_trans.argmax(0)
906
+ path_scores[i] = prev_score_and_trans[path_backtrack[i],
907
+ np.arange(self.output_size)] + scores[i]
908
+
909
+ # now find the maximum score for the last token and follow the backtrack
910
+ answer = np.empty(len(scores), dtype=int)
911
+ #answer = np.empty(len(scores), dtype=np.int)
912
+ answer[-1] = path_scores[-1].argmax()
913
+ self.answer_score = path_scores[-1][answer[-1]]
914
+ previous_tag = path_backtrack[-1][answer[-1]]
915
+
916
+ for i in range(len(scores) - 2, 0, -1):
917
+ answer[i] = previous_tag
918
+ previous_tag = path_backtrack[i][previous_tag]
919
+
920
+ answer[0] = previous_tag
921
+ return answer
922
+
923
+ def _create_target_lookup(self):
924
+ """
925
+ Creates a lookup table with the window value for each different distance
926
+ to the target token (target_dist_lookup) and one with the precomputed
927
+ values in the convolution layer (target_convolution_lookup).
928
+ """
929
+ # consider padding. if the table has 10 entries, with a word window of 3,
930
+ # we would have to consider up to the distance of 11, because of the padding.
931
+ num_distances = self.target_dist_table.shape[0] + self.word_window_size - 1
932
+ self.target_dist_lookup = np.empty((num_distances,
933
+ self.word_window_size * self.target_dist_table.shape[1]))
934
+ self.target_dist_offset = num_distances / 2
935
+ window_from = 0
936
+ window_to = self.target_dist_table.shape[1]
937
+ for i in range(self.word_window_size):
938
+ # each token in the window will is shifted in relation to the middle one
939
+ shift = i - self.half_window
940
+
941
+ # discount half window size because of the extra distances we added for padding
942
+ inds = np.arange(shift, num_distances + shift) - self.half_window
943
+ inds = np.clip(inds, 0, self.target_dist_table.shape[0] - 1)
944
+ self.target_dist_lookup[:,window_from : window_to] = self.target_dist_table[inds,]
945
+
946
+ window_from = window_to
947
+ window_to += self.target_dist_table.shape[1]
948
+
949
+ self.target_convolution_lookup = self.target_dist_lookup.dot(self.target_dist_weights)
950
+
951
+
952
+ def _create_pred_lookup(self):
953
+ """
954
+ Creates a lookup table with the window value for each different distance
955
+ to the predicate token (pred_dist_lookup) and one with the precomputed
956
+ values in the convolution layer (pred_convolution_lookup).
957
+ """
958
+ # consider padding. if the table has 10 entries, with a word window of 3,
959
+ # we would have to consider up to the distance of 11, because of the padding.
960
+ num_distances = self.pred_dist_table.shape[0] + self.word_window_size - 1
961
+ self.pred_dist_lookup = np.empty((num_distances,
962
+ self.word_window_size * self.pred_dist_table.shape[1]))
963
+ self.pred_dist_offset = num_distances / 2
964
+ window_from = 0
965
+ window_to = self.pred_dist_table.shape[1]
966
+ for i in range(self.word_window_size):
967
+ # each token in the window will is shifted in relation to the middle one
968
+ shift = i - self.half_window
969
+
970
+ # discount half window size because of the extra distances we added for padding
971
+ inds = np.arange(shift, num_distances + shift) - self.half_window
972
+ inds = np.clip(inds, 0, self.pred_dist_table.shape[0] - 1)
973
+ self.pred_dist_lookup[:,window_from : window_to] = self.pred_dist_table[inds,]
974
+
975
+ window_from = window_to
976
+ window_to += self.pred_dist_table.shape[1]
977
+
978
+ self.pred_convolution_lookup = self.pred_dist_lookup.dot(self.pred_dist_weights)
979
+
980
+ def _create_convolution_lookup(self, sentence, training):
981
+ """
982
+ Creates a lookup table storing the values found by each
983
+ convolutional neuron before summing distance features.
984
+ The table has the format len(sent) x len(convol layer)
985
+ Biases are not included.
986
+ """
987
+ cdef np.ndarray padded_sentence
988
+
989
+ # add padding to the sentence
990
+ if self.word_window_size > 1:
991
+ padded_sentence = np.vstack((self.pre_padding,
992
+ sentence,
993
+ self.pos_padding))
994
+ else:
995
+ padded_sentence = sentence
996
+
997
+ self.convolution_lookup = np.empty((len(sentence), self.hidden_size))
998
+ #print(len(sentence), self.hidden_size, len(self.convolution_lookup[0]))
999
+
1000
+ # first window
1001
+ cdef np.ndarray window = padded_sentence[:self.word_window_size]
1002
+ cdef np.ndarray input_data
1003
+ input_data = np.concatenate(
1004
+ [table[index]
1005
+ for token_indices in window
1006
+ for index, table in zip(token_indices,
1007
+ self.feature_tables)
1008
+ ]
1009
+ )
1010
+ self.convolution_lookup[0] = self.hidden_weights.dot(input_data)
1011
+ if training:
1012
+ # store the values of each input -- needed when adjusting features
1013
+ self.input_sent_values[0] = input_data
1014
+
1015
+ cdef np.ndarray[FLOAT_t, ndim=1] new_data
1016
+ for i, element in enumerate(padded_sentence[self.word_window_size:], 1):
1017
+ new_data = np.concatenate([table[index] for
1018
+ index, table in zip(element, self.feature_tables)])
1019
+
1020
+ # slide the window to the next element
1021
+ input_data = np.concatenate((input_data[self.features_per_token:],
1022
+ new_data))
1023
+
1024
+ #print(i, input_data)
1025
+ self.convolution_lookup[i] = self.hidden_weights.dot(input_data)
1026
+ if training:
1027
+ self.input_sent_values[i] = input_data
1028
+