nltkor 1.2.14__cp311-cp311-macosx_13_0_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nltkor/Kor_char.py +193 -0
- nltkor/__init__.py +16 -0
- nltkor/alignment/__init__.py +1315 -0
- nltkor/cider/__init__.py +2 -0
- nltkor/cider/cider.py +55 -0
- nltkor/cider/cider_scorer.py +207 -0
- nltkor/distance/__init__.py +441 -0
- nltkor/distance/wasserstein.py +126 -0
- nltkor/etc.py +22 -0
- nltkor/lazyimport.py +144 -0
- nltkor/make_requirement.py +11 -0
- nltkor/metrics/__init__.py +63 -0
- nltkor/metrics/bartscore.py +301 -0
- nltkor/metrics/bertscore.py +331 -0
- nltkor/metrics/bleu_tensor.py +20 -0
- nltkor/metrics/classical.py +847 -0
- nltkor/metrics/entment.py +24 -0
- nltkor/metrics/eval.py +517 -0
- nltkor/metrics/mauve.py +273 -0
- nltkor/metrics/mauve_utils.py +131 -0
- nltkor/misc/__init__.py +11 -0
- nltkor/misc/string2string_basic_functions.py +59 -0
- nltkor/misc/string2string_default_tokenizer.py +83 -0
- nltkor/misc/string2string_hash_functions.py +159 -0
- nltkor/misc/string2string_word_embeddings.py +503 -0
- nltkor/search/__init__.py +10 -0
- nltkor/search/classical.py +569 -0
- nltkor/search/faiss_search.py +787 -0
- nltkor/search/kobert_tokenizer.py +181 -0
- nltkor/sejong/__init__.py +3 -0
- nltkor/sejong/__pycache__/__init__.cpython-38.pyc +0 -0
- nltkor/sejong/__pycache__/__init__.cpython-39.pyc +0 -0
- nltkor/sejong/__pycache__/sejong_download.cpython-38.pyc +0 -0
- nltkor/sejong/__pycache__/sejong_download.cpython-39.pyc +0 -0
- nltkor/sejong/__pycache__/ssem.cpython-38.pyc +0 -0
- nltkor/sejong/__pycache__/ssem.cpython-39.pyc +0 -0
- nltkor/sejong/ch.py +12 -0
- nltkor/sejong/dict_semClassNum.txt +491 -0
- nltkor/sejong/layer.txt +630 -0
- nltkor/sejong/sejong_download.py +87 -0
- nltkor/sejong/ssem.py +684 -0
- nltkor/similarity/__init__.py +3 -0
- nltkor/similarity/bartscore____.py +337 -0
- nltkor/similarity/bertscore____.py +339 -0
- nltkor/similarity/classical.py +245 -0
- nltkor/similarity/cosine_similarity.py +175 -0
- nltkor/tag/__init__.py +71 -0
- nltkor/tag/__pycache__/__init__.cpython-38.pyc +0 -0
- nltkor/tag/__pycache__/__init__.cpython-39.pyc +0 -0
- nltkor/tag/__pycache__/espresso_tag.cpython-38.pyc +0 -0
- nltkor/tag/__pycache__/espresso_tag.cpython-39.pyc +0 -0
- nltkor/tag/espresso_tag.py +220 -0
- nltkor/tag/libs/__init__.py +10 -0
- nltkor/tag/libs/__pycache__/__init__.cpython-38.pyc +0 -0
- nltkor/tag/libs/__pycache__/__init__.cpython-39.pyc +0 -0
- nltkor/tag/libs/__pycache__/attributes.cpython-38.pyc +0 -0
- nltkor/tag/libs/__pycache__/attributes.cpython-39.pyc +0 -0
- nltkor/tag/libs/__pycache__/config.cpython-38.pyc +0 -0
- nltkor/tag/libs/__pycache__/config.cpython-39.pyc +0 -0
- nltkor/tag/libs/__pycache__/metadata.cpython-38.pyc +0 -0
- nltkor/tag/libs/__pycache__/metadata.cpython-39.pyc +0 -0
- nltkor/tag/libs/__pycache__/reader.cpython-38.pyc +0 -0
- nltkor/tag/libs/__pycache__/reader.cpython-39.pyc +0 -0
- nltkor/tag/libs/__pycache__/taggers.cpython-38.pyc +0 -0
- nltkor/tag/libs/__pycache__/taggers.cpython-39.pyc +0 -0
- nltkor/tag/libs/__pycache__/utils.cpython-38.pyc +0 -0
- nltkor/tag/libs/__pycache__/utils.cpython-39.pyc +0 -0
- nltkor/tag/libs/__pycache__/word_dictionary.cpython-38.pyc +0 -0
- nltkor/tag/libs/__pycache__/word_dictionary.cpython-39.pyc +0 -0
- nltkor/tag/libs/arguments.py +280 -0
- nltkor/tag/libs/attributes.py +231 -0
- nltkor/tag/libs/config.py +159 -0
- nltkor/tag/libs/metadata.py +129 -0
- nltkor/tag/libs/ner/__init__.py +2 -0
- nltkor/tag/libs/ner/__pycache__/__init__.cpython-38.pyc +0 -0
- nltkor/tag/libs/ner/__pycache__/__init__.cpython-39.pyc +0 -0
- nltkor/tag/libs/ner/__pycache__/ner_reader.cpython-38.pyc +0 -0
- nltkor/tag/libs/ner/__pycache__/ner_reader.cpython-39.pyc +0 -0
- nltkor/tag/libs/ner/macmorphoreader.py +7 -0
- nltkor/tag/libs/ner/ner_reader.py +92 -0
- nltkor/tag/libs/network.c +72325 -0
- nltkor/tag/libs/network.cpython-311-darwin.so +0 -0
- nltkor/tag/libs/network.pyx +878 -0
- nltkor/tag/libs/networkconv.pyx +1028 -0
- nltkor/tag/libs/networkdependencyconv.pyx +451 -0
- nltkor/tag/libs/parse/__init__.py +1 -0
- nltkor/tag/libs/parse/__pycache__/__init__.cpython-38.pyc +0 -0
- nltkor/tag/libs/parse/__pycache__/__init__.cpython-39.pyc +0 -0
- nltkor/tag/libs/parse/__pycache__/parse_reader.cpython-38.pyc +0 -0
- nltkor/tag/libs/parse/__pycache__/parse_reader.cpython-39.pyc +0 -0
- nltkor/tag/libs/parse/parse_reader.py +283 -0
- nltkor/tag/libs/pos/__init__.py +2 -0
- nltkor/tag/libs/pos/__pycache__/__init__.cpython-38.pyc +0 -0
- nltkor/tag/libs/pos/__pycache__/__init__.cpython-39.pyc +0 -0
- nltkor/tag/libs/pos/__pycache__/pos_reader.cpython-38.pyc +0 -0
- nltkor/tag/libs/pos/__pycache__/pos_reader.cpython-39.pyc +0 -0
- nltkor/tag/libs/pos/macmorphoreader.py +7 -0
- nltkor/tag/libs/pos/pos_reader.py +97 -0
- nltkor/tag/libs/reader.py +485 -0
- nltkor/tag/libs/srl/__init__.py +3 -0
- nltkor/tag/libs/srl/__pycache__/__init__.cpython-38.pyc +0 -0
- nltkor/tag/libs/srl/__pycache__/__init__.cpython-39.pyc +0 -0
- nltkor/tag/libs/srl/__pycache__/srl_reader.cpython-38.pyc +0 -0
- nltkor/tag/libs/srl/__pycache__/srl_reader.cpython-39.pyc +0 -0
- nltkor/tag/libs/srl/__pycache__/train_srl.cpython-38.pyc +0 -0
- nltkor/tag/libs/srl/__pycache__/train_srl.cpython-39.pyc +0 -0
- nltkor/tag/libs/srl/__srl_reader_.py +535 -0
- nltkor/tag/libs/srl/srl_reader.py +436 -0
- nltkor/tag/libs/srl/train_srl.py +87 -0
- nltkor/tag/libs/taggers.py +926 -0
- nltkor/tag/libs/utils.py +384 -0
- nltkor/tag/libs/word_dictionary.py +239 -0
- nltkor/tag/libs/wsd/__init__.py +2 -0
- nltkor/tag/libs/wsd/__pycache__/__init__.cpython-38.pyc +0 -0
- nltkor/tag/libs/wsd/__pycache__/__init__.cpython-39.pyc +0 -0
- nltkor/tag/libs/wsd/__pycache__/wsd_reader.cpython-38.pyc +0 -0
- nltkor/tag/libs/wsd/__pycache__/wsd_reader.cpython-39.pyc +0 -0
- nltkor/tag/libs/wsd/macmorphoreader.py +7 -0
- nltkor/tag/libs/wsd/wsd_reader.py +93 -0
- nltkor/tokenize/__init__.py +62 -0
- nltkor/tokenize/ko_tokenize.py +115 -0
- nltkor/trans.py +121 -0
- nltkor-1.2.14.dist-info/LICENSE.txt +1093 -0
- nltkor-1.2.14.dist-info/METADATA +41 -0
- nltkor-1.2.14.dist-info/RECORD +127 -0
- nltkor-1.2.14.dist-info/WHEEL +5 -0
- nltkor-1.2.14.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1028 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
"""
|
4
|
+
A convolutional neural network for NLP tagging tasks like SRL.
|
5
|
+
It employs feature tables to store feature vectors for each token.
|
6
|
+
"""
|
7
|
+
|
8
|
+
import numpy as np
|
9
|
+
cimport numpy as np
|
10
|
+
|
11
|
+
cdef class ConvolutionalNetwork(Network):
|
12
|
+
|
13
|
+
# transition and distance feature tables
|
14
|
+
cdef public np.ndarray target_dist_table, pred_dist_table
|
15
|
+
cdef readonly np.ndarray target_dist_weights, pred_dist_weights
|
16
|
+
cdef readonly int target_dist_offset, pred_dist_offset
|
17
|
+
cdef readonly np.ndarray target_dist_lookup, pred_dist_lookup
|
18
|
+
cdef readonly np.ndarray target_convolution_lookup, pred_convolution_lookup
|
19
|
+
cdef readonly np.ndarray target_dist_deltas, pred_dist_deltas
|
20
|
+
|
21
|
+
# the second hidden layer
|
22
|
+
cdef readonly int hidden2_size
|
23
|
+
cdef readonly np.ndarray hidden2_weights, hidden2_bias
|
24
|
+
cdef readonly np.ndarray hidden2_values
|
25
|
+
cdef readonly np.ndarray hidden2_before_activation, hidden_before_activation
|
26
|
+
|
27
|
+
# lookup of convolution values (the same for each sentence, used to save time)
|
28
|
+
cdef np.ndarray convolution_lookup
|
29
|
+
|
30
|
+
# maximum convolution indices
|
31
|
+
cdef readonly np.ndarray max_indices
|
32
|
+
|
33
|
+
# number of targets (all tokens in a sentence or the provided arguments)
|
34
|
+
# and variables for argument classifying
|
35
|
+
cdef int num_targets
|
36
|
+
cdef bool only_classify
|
37
|
+
|
38
|
+
# for faster access
|
39
|
+
cdef int half_window
|
40
|
+
|
41
|
+
# the convolution gradients
|
42
|
+
cdef np.ndarray hidden_gradients, hidden2_gradients
|
43
|
+
cdef np.ndarray input_deltas
|
44
|
+
|
45
|
+
# keeping statistics
|
46
|
+
cdef int num_sentences
|
47
|
+
|
48
|
+
# validation
|
49
|
+
cdef list validation_predicates, validation_arguments
|
50
|
+
|
51
|
+
@classmethod
|
52
|
+
def create_new(cls, feature_tables, target_dist_table, pred_dist_table,
|
53
|
+
int word_window, int hidden1_size, int hidden2_size, int output_size):
|
54
|
+
"""Creates a new convolutional neural network."""
|
55
|
+
# sum the number of features in all tables except for distance
|
56
|
+
cdef int input_size = sum(table.shape[1] for table in feature_tables)
|
57
|
+
input_size *= word_window
|
58
|
+
|
59
|
+
dist_features_per_token = target_dist_table.shape[1] + pred_dist_table.shape[1]
|
60
|
+
input_size_with_distance = input_size + (word_window * dist_features_per_token)
|
61
|
+
|
62
|
+
# creates the weight matrices
|
63
|
+
high = 2.38 / np.sqrt(input_size_with_distance) # [Bottou-88]
|
64
|
+
hidden_weights = np.random.uniform(-high, high, (hidden1_size, input_size))
|
65
|
+
|
66
|
+
num_dist_features = word_window * target_dist_table.shape[1]
|
67
|
+
target_dist_weights = np.random.uniform(-high, high, (num_dist_features, hidden1_size))
|
68
|
+
num_dist_features = word_window * pred_dist_table.shape[1]
|
69
|
+
pred_dist_weights = np.random.uniform(-high, high, (num_dist_features, hidden1_size))
|
70
|
+
|
71
|
+
high = 2.38 / np.sqrt(hidden1_size)
|
72
|
+
hidden_bias = np.random.uniform(-high, high, hidden1_size)
|
73
|
+
|
74
|
+
if hidden2_size > 0:
|
75
|
+
hidden2_weights = np.random.uniform(-high, high, (hidden2_size, hidden1_size))
|
76
|
+
high = 2.38 / np.sqrt(hidden2_size)
|
77
|
+
hidden2_bias = np.random.uniform(-high, high, hidden2_size)
|
78
|
+
output_dim = (output_size, hidden2_size)
|
79
|
+
else:
|
80
|
+
hidden2_weights = None
|
81
|
+
hidden2_bias = None
|
82
|
+
output_dim = (output_size, hidden1_size)
|
83
|
+
|
84
|
+
high = 2.38 / np.sqrt(output_dim[1])
|
85
|
+
output_weights = np.random.uniform(-high, high, output_dim)
|
86
|
+
high = 2.38 / np.sqrt(output_size)
|
87
|
+
output_bias = np.random.uniform(-high, high, output_size)
|
88
|
+
|
89
|
+
net = cls(word_window, input_size, hidden1_size, hidden2_size,
|
90
|
+
output_size, hidden_weights, hidden_bias,
|
91
|
+
target_dist_weights, pred_dist_weights,
|
92
|
+
hidden2_weights, hidden2_bias,
|
93
|
+
output_weights, output_bias)
|
94
|
+
net.feature_tables = feature_tables
|
95
|
+
net.target_dist_table = target_dist_table
|
96
|
+
net.pred_dist_table = pred_dist_table
|
97
|
+
|
98
|
+
return net
|
99
|
+
|
100
|
+
def description(self):
|
101
|
+
"""Returns a textual description of the network."""
|
102
|
+
hidden2_size = 0 if self.hidden2_weights is None else self.hidden2_size
|
103
|
+
table_dims = [str(t.shape[1]) for t in self.feature_tables]
|
104
|
+
table_dims = ', '.join(table_dims)
|
105
|
+
|
106
|
+
dist_table_dims = '%d, %d' % (self.target_dist_table.shape[1], self.pred_dist_table.shape[1])
|
107
|
+
|
108
|
+
desc = """
|
109
|
+
Word window size: %d
|
110
|
+
Feature table sizes: %s
|
111
|
+
Distance table sizes (target and predicate): %s
|
112
|
+
Input layer size: %d
|
113
|
+
Convolution layer size: %d
|
114
|
+
Second hidden layer size: %d
|
115
|
+
Output size: %d
|
116
|
+
""" % (self.word_window_size, table_dims, dist_table_dims, self.input_size, self.hidden_size,
|
117
|
+
hidden2_size, self.output_size)
|
118
|
+
|
119
|
+
return desc
|
120
|
+
|
121
|
+
|
122
|
+
def __init__(self, word_window, input_size, hidden1_size, hidden2_size,
|
123
|
+
output_size, hidden1_weights, hidden1_bias, target_dist_weights,
|
124
|
+
pred_dist_weights, hidden2_weights, hidden2_bias,
|
125
|
+
output_weights, output_bias):
|
126
|
+
super(ConvolutionalNetwork, self).__init__(word_window, input_size,
|
127
|
+
hidden1_size, output_size,
|
128
|
+
hidden1_weights, hidden1_bias,
|
129
|
+
output_weights, output_bias)
|
130
|
+
self.half_window = word_window / 2
|
131
|
+
self.features_per_token = self.input_size / word_window
|
132
|
+
|
133
|
+
self.transitions = None
|
134
|
+
self.target_dist_lookup = None
|
135
|
+
self.pred_dist_lookup = None
|
136
|
+
self.target_dist_weights = target_dist_weights
|
137
|
+
self.pred_dist_weights = pred_dist_weights
|
138
|
+
|
139
|
+
self.hidden2_size = hidden2_size
|
140
|
+
self.hidden2_weights = hidden2_weights
|
141
|
+
self.hidden2_bias = hidden2_bias
|
142
|
+
|
143
|
+
self.validation_predicates = None
|
144
|
+
self.validation_arguments = None
|
145
|
+
|
146
|
+
self.use_learning_rate_decay = False
|
147
|
+
|
148
|
+
def _generate_save_dict(self):
|
149
|
+
"""
|
150
|
+
Generates a dictionary with all parameters saved by the model.
|
151
|
+
It is directly used by the numpy savez function.
|
152
|
+
"""
|
153
|
+
d = dict(hidden_weights=self.hidden_weights,
|
154
|
+
target_dist_table=self.target_dist_table,
|
155
|
+
pred_dist_table=self.pred_dist_table,
|
156
|
+
target_dist_weights=self.target_dist_weights,
|
157
|
+
pred_dist_weights=self.pred_dist_weights,
|
158
|
+
output_weights=self.output_weights,
|
159
|
+
transitions=self.transitions,
|
160
|
+
hidden_bias=self.hidden_bias, output_bias=self.output_bias,
|
161
|
+
word_window_size=self.word_window_size,
|
162
|
+
input_size=self.input_size, hidden_size=self.hidden_size,
|
163
|
+
output_size=self.output_size, hidden2_size=self.hidden2_size,
|
164
|
+
hidden2_weights=self.hidden2_weights, hidden2_bias=self.hidden2_bias,
|
165
|
+
padding_left=self.padding_left, padding_right=self.padding_right,
|
166
|
+
feature_tables=self.feature_tables)
|
167
|
+
return d
|
168
|
+
|
169
|
+
def save(self):
|
170
|
+
"""
|
171
|
+
Saves the neural network to a file.
|
172
|
+
It will save the weights, biases, sizes, padding and
|
173
|
+
distance tables, and other feature tables.
|
174
|
+
"""
|
175
|
+
data = self._generate_save_dict()
|
176
|
+
np.savez(self.network_filename, **data)
|
177
|
+
|
178
|
+
@classmethod
|
179
|
+
def _load_from_file(cls, data, filename):
|
180
|
+
"""
|
181
|
+
Internal method for setting data read from a npz file.
|
182
|
+
"""
|
183
|
+
# cython classes don't have the __dict__ attribute
|
184
|
+
# so we can't do an elegant self.__dict__.update(data)
|
185
|
+
hidden_weights = data['hidden_weights']
|
186
|
+
hidden_bias = data['hidden_bias']
|
187
|
+
hidden2_weights = data['hidden2_weights']
|
188
|
+
|
189
|
+
hidden2_bias = data['hidden2_bias']
|
190
|
+
output_weights = data['output_weights']
|
191
|
+
output_bias = data['output_bias']
|
192
|
+
|
193
|
+
word_window_size = data['word_window_size']
|
194
|
+
input_size = data['input_size']
|
195
|
+
hidden_size = data['hidden_size']
|
196
|
+
hidden2_size = data['hidden2_size']
|
197
|
+
output_size = data['output_size']
|
198
|
+
|
199
|
+
# numpy stores None as an array containing None and with empty shape
|
200
|
+
if hidden2_weights.shape == ():
|
201
|
+
hidden2_weights = None
|
202
|
+
hidden2_size = 0
|
203
|
+
hidden2_bias = None
|
204
|
+
|
205
|
+
nn = cls(word_window_size, input_size, hidden_size, hidden2_size,
|
206
|
+
output_size, hidden_weights, hidden_bias,
|
207
|
+
data['target_dist_weights'], data['pred_dist_weights'],
|
208
|
+
hidden2_weights, hidden2_bias,
|
209
|
+
output_weights, output_bias)
|
210
|
+
|
211
|
+
nn.target_dist_table = data['target_dist_table']
|
212
|
+
nn.pred_dist_table = data['pred_dist_table']
|
213
|
+
#transitions = data['transitions']
|
214
|
+
#nn.transitions = transitions if transitions.shape != () else None
|
215
|
+
nn.padding_left = data['padding_left']
|
216
|
+
nn.padding_right = data['padding_right']
|
217
|
+
nn.pre_padding = np.array(int(nn.word_window_size // 2) * [nn.padding_left])
|
218
|
+
nn.pos_padding = np.array(int(nn.word_window_size // 2) * [nn.padding_right])
|
219
|
+
nn.feature_tables = list(data['feature_tables'])
|
220
|
+
nn.network_filename = filename
|
221
|
+
|
222
|
+
return nn
|
223
|
+
|
224
|
+
@classmethod
|
225
|
+
def load_from_file(cls, filename):
|
226
|
+
"""
|
227
|
+
Loads the neural network from a file.
|
228
|
+
It will load weights, biases, sizes, padding and
|
229
|
+
distance tables, and other feature tables.
|
230
|
+
"""
|
231
|
+
data = np.load(filename, allow_pickle=True) # 수정
|
232
|
+
return cls._load_from_file(data, filename)
|
233
|
+
|
234
|
+
def _load_parameters(self):
|
235
|
+
"""
|
236
|
+
Loads weights, feature tables, distance tables and
|
237
|
+
transition tables previously saved.
|
238
|
+
"""
|
239
|
+
data = np.load(self.network_filename)
|
240
|
+
self.hidden_weights = data['hidden_weights']
|
241
|
+
self.hidden_bias = data['hidden_bias']
|
242
|
+
self.output_weights = data['output_weights']
|
243
|
+
self.output_bias = data['output_bias']
|
244
|
+
self.feature_tables = list(data['feature_tables'])
|
245
|
+
self.target_dist_table = data['target_dist_table']
|
246
|
+
self.pred_dist_table = data['pred_dist_table']
|
247
|
+
|
248
|
+
# check if transitions isn't None (numpy saves everything as an array)
|
249
|
+
if data['transitions'].shape != ():
|
250
|
+
self.transitions = data['transitions']
|
251
|
+
else:
|
252
|
+
self.transitions = None
|
253
|
+
|
254
|
+
# same for second hidden layer weights
|
255
|
+
if data['hidden2_weights'].shape != ():
|
256
|
+
self.hidden2_weights = data['hidden2_weights']
|
257
|
+
self.hidden2_bias = data['hidden2_bias']
|
258
|
+
else:
|
259
|
+
self.hidden2_weights = None
|
260
|
+
|
261
|
+
def set_validation_data(self, list validation_sentences,
|
262
|
+
list validation_predicates,
|
263
|
+
list validation_tags,
|
264
|
+
list validation_arguments=None):
|
265
|
+
"""
|
266
|
+
Sets the data to be used in validation during training. If this function
|
267
|
+
is not called before training, the training data itself is used to
|
268
|
+
measure the model's performance.
|
269
|
+
"""
|
270
|
+
self.validation_sentences = validation_sentences
|
271
|
+
self.validation_predicates = validation_predicates
|
272
|
+
self.validation_tags = validation_tags
|
273
|
+
self.validation_arguments = validation_arguments
|
274
|
+
|
275
|
+
def train(self, list sentences, list predicates, list tags,
|
276
|
+
int epochs, int epochs_between_reports=0,
|
277
|
+
float desired_accuracy=0, list arguments=None):
|
278
|
+
"""
|
279
|
+
Trains the convolutional network. Refer to the basic Network
|
280
|
+
train method for detailed explanation.
|
281
|
+
|
282
|
+
:param predicates: a list of 1-dim numpy array
|
283
|
+
indicating the indices of predicates in each sentence.
|
284
|
+
:param arguments: (only for argument classifying) a list of 2-dim
|
285
|
+
numpy arrays indicating the start and end of each argument.
|
286
|
+
"""
|
287
|
+
self.num_sentences = len(sentences)
|
288
|
+
self.num_tokens = sum(len(sent) for sent in sentences)
|
289
|
+
self.only_classify = arguments is not None
|
290
|
+
|
291
|
+
logger = logging.getLogger("Logger")
|
292
|
+
logger.info("Training for up to %d epochs" % epochs)
|
293
|
+
last_accuracy = 0
|
294
|
+
top_accuracy = 0
|
295
|
+
last_error = np.Infinity
|
296
|
+
|
297
|
+
if self.validation_sentences is None:
|
298
|
+
self.set_validation_data(sentences, predicates, tags, arguments)
|
299
|
+
|
300
|
+
for i in xrange(epochs):
|
301
|
+
self.decrease_learning_rates(i)
|
302
|
+
self._train_epoch(sentences, predicates, tags, arguments)
|
303
|
+
self._validate()
|
304
|
+
|
305
|
+
# Attardi: save model
|
306
|
+
if self.accuracy > top_accuracy:
|
307
|
+
top_accuracy = self.accuracy
|
308
|
+
self.save()
|
309
|
+
logger.debug("Saved model")
|
310
|
+
elif self.use_learning_rate_decay:
|
311
|
+
# this iteration didn't bring improvements; load the last saved model
|
312
|
+
# before continuing training with a lower rate
|
313
|
+
self._load_parameters()
|
314
|
+
|
315
|
+
if (epochs_between_reports > 0 and i % epochs_between_reports == 0) \
|
316
|
+
or self.accuracy >= desired_accuracy > 0 \
|
317
|
+
or (self.accuracy < last_accuracy and self.error > last_error):
|
318
|
+
|
319
|
+
self._print_epoch_report(i + 1)
|
320
|
+
|
321
|
+
if self.accuracy >= desired_accuracy > 0\
|
322
|
+
or (self.accuracy < last_accuracy and self.error > last_error):
|
323
|
+
# accuracy is falling, the network is probably diverging
|
324
|
+
# or overfitting
|
325
|
+
break
|
326
|
+
|
327
|
+
last_accuracy = self.accuracy
|
328
|
+
last_error = self.error
|
329
|
+
|
330
|
+
self.num_sentences = 0
|
331
|
+
self.num_tokens = 0
|
332
|
+
self._reset_counters()
|
333
|
+
|
334
|
+
def _reset_counters(self):
|
335
|
+
"""
|
336
|
+
Reset the performance statistics counters. They are updated during
|
337
|
+
each epoch.
|
338
|
+
"""
|
339
|
+
self.error = 0
|
340
|
+
self.skips = 0
|
341
|
+
self.float_errors = 0
|
342
|
+
|
343
|
+
def _shuffle_data(self, sentences, predicates, tags, arguments=None):
|
344
|
+
"""
|
345
|
+
Shuffle the given training data in place.
|
346
|
+
"""
|
347
|
+
# get the random number generator state in order to shuffle
|
348
|
+
# sentences and their tags in the same order
|
349
|
+
random_state = np.random.get_state()
|
350
|
+
np.random.shuffle(sentences)
|
351
|
+
np.random.set_state(random_state)
|
352
|
+
np.random.shuffle(predicates)
|
353
|
+
np.random.set_state(random_state)
|
354
|
+
np.random.shuffle(tags)
|
355
|
+
if arguments is not None:
|
356
|
+
np.random.set_state(random_state)
|
357
|
+
np.random.shuffle(arguments)
|
358
|
+
|
359
|
+
|
360
|
+
def _train_epoch(self, sentences, predicates, tags, arguments):
|
361
|
+
"""Trains for one epoch with all examples."""
|
362
|
+
|
363
|
+
self._reset_counters()
|
364
|
+
self._shuffle_data(sentences, predicates, tags, arguments)
|
365
|
+
if arguments is not None:
|
366
|
+
i_args = iter(arguments)
|
367
|
+
else:
|
368
|
+
sent_args = None
|
369
|
+
|
370
|
+
for sent, sent_preds, sent_tags in zip(sentences, predicates, tags):
|
371
|
+
if arguments is not None:
|
372
|
+
sent_args = next(i_args)
|
373
|
+
#sent_args = i_args.next()
|
374
|
+
|
375
|
+
try:
|
376
|
+
self._tag_sentence(sent, sent_preds, sent_tags, sent_args)
|
377
|
+
except FloatingPointError:
|
378
|
+
# just ignore the sentence in case of an overflow
|
379
|
+
self.float_errors += 1
|
380
|
+
|
381
|
+
def tag_sentence(self, np.ndarray sentence, np.ndarray predicates,
|
382
|
+
list arguments=None, bool logprob=False,
|
383
|
+
bool allow_repeats=True):
|
384
|
+
"""
|
385
|
+
Runs the network for each element in the sentence and returns
|
386
|
+
the sequence of tags.
|
387
|
+
|
388
|
+
:param sentence: a 2-dim numpy array, where each item encodes a token.
|
389
|
+
:param predicates: a 1-dim numpy array, indicating the position
|
390
|
+
of the predicates in the sentence
|
391
|
+
:param logprob: a boolean indicating whether to return the
|
392
|
+
log-probability for each answer or not.
|
393
|
+
:param allow_repeats: a boolean indicating whether to allow repeated
|
394
|
+
argument classes (only for separate argument classification).
|
395
|
+
"""
|
396
|
+
self.only_classify = arguments is not None
|
397
|
+
return self._tag_sentence(sentence, predicates, argument_blocks=arguments,
|
398
|
+
logprob=logprob, allow_repeats=allow_repeats)
|
399
|
+
|
400
|
+
cdef np.ndarray argument_distances(self, positions, argument):
|
401
|
+
"""
|
402
|
+
Calculates the distance from each token in the sentence to the argument.
|
403
|
+
"""
|
404
|
+
distances = positions.copy()
|
405
|
+
|
406
|
+
# the ones before the argument
|
407
|
+
#print(argument)
|
408
|
+
lo = np.less(positions, argument[0])
|
409
|
+
distances[lo] -= argument[0]
|
410
|
+
|
411
|
+
# the ones after the argument
|
412
|
+
hi = np.greater(positions, argument[1])
|
413
|
+
distances[hi] -= argument[1]
|
414
|
+
|
415
|
+
# the ones inside the argument
|
416
|
+
distances[np.logical_not(hi | lo)] = 0
|
417
|
+
|
418
|
+
return distances
|
419
|
+
|
420
|
+
|
421
|
+
@cython.boundscheck(False)
|
422
|
+
@cython.wraparound(False)
|
423
|
+
def _sentence_convolution(self, sentence, predicate, argument_blocks=None,
|
424
|
+
training=False):
|
425
|
+
"""
|
426
|
+
Perform the convolution for a given predicate.
|
427
|
+
|
428
|
+
:param sentence: a sequence of tokens, each represented as an array of
|
429
|
+
indices
|
430
|
+
:param predicate: the index of the predicate in the sentence
|
431
|
+
:param argument_blocks: (used only in SRL argument classification) the
|
432
|
+
starting and end positions of all delimited arguments
|
433
|
+
:return: the scores for all tokens with respect to the given predicate
|
434
|
+
"""
|
435
|
+
# store the values found by each convolution neuron here and then find the max
|
436
|
+
cdef np.ndarray[FLOAT_t, ndim=2] convolution_values
|
437
|
+
|
438
|
+
# a priori scores for all tokens
|
439
|
+
cdef np.ndarray[FLOAT_t, ndim=2] scores
|
440
|
+
|
441
|
+
# intermediate storage
|
442
|
+
cdef np.ndarray[FLOAT_t, ndim=2] input_and_pred_dist_values
|
443
|
+
|
444
|
+
self.num_targets = len(sentence) if argument_blocks is None else len(argument_blocks)
|
445
|
+
|
446
|
+
# maximum values found by convolution
|
447
|
+
self.hidden_values = np.zeros((self.num_targets, self.hidden_size))
|
448
|
+
|
449
|
+
if training:
|
450
|
+
# hidden sent values: results after tanh
|
451
|
+
self.hidden_values = np.zeros((self.num_targets, self.hidden_size))
|
452
|
+
self.max_indices = np.empty((self.num_targets, self.hidden_size), int)
|
453
|
+
#self.max_indices = np.empty((self.num_targets, self.hidden_size), np.int)
|
454
|
+
|
455
|
+
# predicate distances are the same across all targets
|
456
|
+
pred_dist_indices = np.arange(len(sentence)) - predicate
|
457
|
+
pred_dist_values = self.pred_convolution_lookup.take(pred_dist_indices + self.pred_dist_offset,
|
458
|
+
0, mode='clip')
|
459
|
+
|
460
|
+
input_and_pred_dist_values = pred_dist_values + self.convolution_lookup
|
461
|
+
|
462
|
+
for target in range(self.num_targets):
|
463
|
+
# loop over targets and add the weighted distance features to each token
|
464
|
+
# this is necessary for the convolution layer
|
465
|
+
|
466
|
+
# distance features for each window
|
467
|
+
# if we are classifying all tokens, pick the distance to the target
|
468
|
+
# if we are classifying arguments, pick the distance to the closest boundary
|
469
|
+
# of the argument (beginning or end)
|
470
|
+
if argument_blocks is None:
|
471
|
+
target_dist_indices = np.arange(len(sentence)) - target
|
472
|
+
else:
|
473
|
+
argument = argument_blocks[target]
|
474
|
+
#print(argument_blocks)
|
475
|
+
target_dist_indices = self.argument_distances(np.arange(len(sentence)), argument)
|
476
|
+
|
477
|
+
target_dist_values = self.target_convolution_lookup.take(target_dist_indices + self.target_dist_offset,
|
478
|
+
0, mode='clip')
|
479
|
+
|
480
|
+
convolution_values = target_dist_values + input_and_pred_dist_values
|
481
|
+
|
482
|
+
# now, find the maximum values
|
483
|
+
if training:
|
484
|
+
self.max_indices[target] = convolution_values.argmax(0)
|
485
|
+
self.hidden_values[target] = convolution_values.max(0)
|
486
|
+
|
487
|
+
# apply the bias and proceed to the next layer
|
488
|
+
self.hidden_values += self.hidden_bias
|
489
|
+
|
490
|
+
if self.hidden2_weights is not None:
|
491
|
+
self.hidden2_values = self.hidden_values.dot(self.hidden2_weights.T) + self.hidden2_bias
|
492
|
+
|
493
|
+
if training:
|
494
|
+
self.hidden2_before_activation = self.hidden2_values.copy()
|
495
|
+
|
496
|
+
hardtanh(self.hidden2_values, inplace=True)
|
497
|
+
else:
|
498
|
+
# apply non-linearity here
|
499
|
+
if training:
|
500
|
+
self.hidden_before_activation = self.hidden_values.copy()
|
501
|
+
|
502
|
+
self.hidden2_values = self.hidden_values
|
503
|
+
hardtanh(self.hidden_values, inplace=True)
|
504
|
+
|
505
|
+
scores = self.hidden2_values.dot(self.output_weights.T) + self.output_bias
|
506
|
+
|
507
|
+
return scores
|
508
|
+
|
509
|
+
def _pre_tagging_setup(self, np.ndarray sentence, bool training):
|
510
|
+
"""
|
511
|
+
Perform some initialization actions before the actual tagging.
|
512
|
+
"""
|
513
|
+
if training:
|
514
|
+
# this table will store the values of the neurons for each input token
|
515
|
+
# they will be needed during weight adjustments
|
516
|
+
self.input_sent_values = np.empty((len(sentence), self.input_size))
|
517
|
+
|
518
|
+
# store the convolution values to save time
|
519
|
+
self._create_convolution_lookup(sentence, training)
|
520
|
+
|
521
|
+
if self.target_dist_lookup is None: self._create_target_lookup()
|
522
|
+
if self.pred_dist_lookup is None: self._create_pred_lookup()
|
523
|
+
|
524
|
+
|
525
|
+
@cython.boundscheck(False)
|
526
|
+
@cython.wraparound(False)
|
527
|
+
def _tag_sentence(self, np.ndarray sentence, np.ndarray predicates,
|
528
|
+
list tags=None, list argument_blocks=None,
|
529
|
+
bool allow_repeats=True, bool logprob=False):
|
530
|
+
"""
|
531
|
+
Runs the network for every predicate in the sentence.
|
532
|
+
Refer to the Network class for more information.
|
533
|
+
|
534
|
+
:param tags: this is a list rather than a numpy array because in
|
535
|
+
argument classification, each predicate may have a differente number
|
536
|
+
of arguments.
|
537
|
+
:param argument_blocks: (used only in SRL argument classification) a list
|
538
|
+
with the starting and end positions of all delimited arguments (one for
|
539
|
+
each predicate)
|
540
|
+
:param predicates: a numpy array with the indices of the predicates in the sentence.
|
541
|
+
"""
|
542
|
+
answer = []
|
543
|
+
training = tags is not None
|
544
|
+
self._pre_tagging_setup(sentence, training)
|
545
|
+
cdef np.ndarray[FLOAT_t, ndim=2] token_scores
|
546
|
+
|
547
|
+
for i, predicate in enumerate(predicates):
|
548
|
+
pred_arguments = None if not self.only_classify else argument_blocks[i]
|
549
|
+
pred_arguments = argument_blocks[i]
|
550
|
+
|
551
|
+
token_scores = self._sentence_convolution(sentence, predicate, pred_arguments, training)
|
552
|
+
pred_answer = self._viterbi(token_scores, allow_repeats)
|
553
|
+
|
554
|
+
if training:
|
555
|
+
pred_tags = tags[i]
|
556
|
+
if self._calculate_gradients(pred_tags, token_scores):
|
557
|
+
self._backpropagate()
|
558
|
+
self._calculate_input_deltas(sentence, predicate, pred_arguments)
|
559
|
+
self._adjust_weights(predicate, pred_arguments)
|
560
|
+
self._adjust_features(sentence, predicate)
|
561
|
+
|
562
|
+
if logprob:
|
563
|
+
if self.only_classify:
|
564
|
+
raise NotImplementedError('Confidence measure not implemented for argument classifying')
|
565
|
+
|
566
|
+
all_scores = self._calculate_all_scores(token_scores)
|
567
|
+
last_token = len(sentence) - 1
|
568
|
+
logadd = np.log(np.sum(np.exp(all_scores[last_token])))
|
569
|
+
confidence = self.answer_score - logadd
|
570
|
+
pred_answer = (pred_answer, confidence)
|
571
|
+
|
572
|
+
answer.append(pred_answer)
|
573
|
+
|
574
|
+
return answer
|
575
|
+
|
576
|
+
def _validate(self):
|
577
|
+
"""
|
578
|
+
Evaluates the network performance, updating its hits count.
|
579
|
+
"""
|
580
|
+
# call it "item" instead of token because the same token may be counted
|
581
|
+
# more than once (sentences with multiple predicates)
|
582
|
+
num_items = 0
|
583
|
+
hits = 0
|
584
|
+
|
585
|
+
if self.validation_arguments is not None:
|
586
|
+
i_args = iter(self.validation_arguments)
|
587
|
+
else:
|
588
|
+
sent_args = None
|
589
|
+
|
590
|
+
for sent, sent_preds, sent_tags in zip(self.validation_sentences,
|
591
|
+
self.validation_predicates,
|
592
|
+
self.validation_tags):
|
593
|
+
if self.validation_arguments is not None:
|
594
|
+
sent_args = next(i_args)
|
595
|
+
#sent_args = i_args.next()
|
596
|
+
|
597
|
+
answer = self._tag_sentence(sent, sent_preds, None, sent_args)
|
598
|
+
for predicate_answer, predicate_tags in zip(answer, sent_tags):
|
599
|
+
for net_tag, gold_tag in zip(predicate_answer, predicate_tags):
|
600
|
+
if net_tag == gold_tag:
|
601
|
+
hits += 1
|
602
|
+
|
603
|
+
num_items += len(predicate_answer)
|
604
|
+
|
605
|
+
self.accuracy = float(hits) / num_items
|
606
|
+
# normalize error
|
607
|
+
self.error /= num_items
|
608
|
+
|
609
|
+
def _calculate_gradients(self, tags, scores):
|
610
|
+
"""Delegates the call to the appropriate function."""
|
611
|
+
if self.only_classify:
|
612
|
+
return self._calculate_gradients_classify(tags, scores)
|
613
|
+
else:
|
614
|
+
return self._calculate_gradients_sll(tags, scores)
|
615
|
+
|
616
|
+
def _calculate_gradients_classify(self, tags, scores):
|
617
|
+
"""
|
618
|
+
Calculates the output deltas for each target in a network that only
|
619
|
+
classifies predelimited arguments.
|
620
|
+
The aim is to minimize the cost, for each argument:
|
621
|
+
logadd(score for all possible tags) - score(correct tag)
|
622
|
+
|
623
|
+
:returns: whether a correction is necessary or not.
|
624
|
+
"""
|
625
|
+
self.net_gradients = np.zeros_like(scores, float)
|
626
|
+
#self.net_gradients = np.zeros_like(scores, np.float)
|
627
|
+
correction = False
|
628
|
+
|
629
|
+
for i, tag_scores in enumerate(scores):
|
630
|
+
tag = tags[i]
|
631
|
+
|
632
|
+
exponentials = np.exp(tag_scores)
|
633
|
+
exp_sum = np.sum(exponentials)
|
634
|
+
logadd = np.log(exp_sum)
|
635
|
+
|
636
|
+
# update the total error
|
637
|
+
error = logadd - tag_scores[tag]
|
638
|
+
self.error += error
|
639
|
+
|
640
|
+
# like the non-convolutional network, don't adjust weights if the error
|
641
|
+
# is too low. An error of 0.01 means a log-prob of -0.01 for the right
|
642
|
+
# tag, i.e., more than 99% probability
|
643
|
+
if error <= 0.01:
|
644
|
+
self.skips += 1
|
645
|
+
continue
|
646
|
+
|
647
|
+
correction = True
|
648
|
+
self.net_gradients[i] = - exponentials / exp_sum
|
649
|
+
self.net_gradients[i, tag] += 1
|
650
|
+
|
651
|
+
return correction
|
652
|
+
|
653
|
+
def _backpropagate(self):
|
654
|
+
"""Backpropagates the error gradient."""
|
655
|
+
|
656
|
+
# this function only determines the gradients at each layer, without
|
657
|
+
# adjusting weights. This is done because the input features must
|
658
|
+
# be adjusted with the first weight matrix unchanged.
|
659
|
+
|
660
|
+
# gradient[i][j] has the gradient for token i at neuron j
|
661
|
+
|
662
|
+
# derivative with respect to the non-linearity layer (tanh)
|
663
|
+
dCd_tanh = self.net_gradients.dot(self.output_weights)
|
664
|
+
|
665
|
+
if self.hidden2_weights is not None:
|
666
|
+
# derivative with respect to the second hidden layer
|
667
|
+
dCd_hidden2 = dCd_tanh * hardtanhd(self.hidden2_before_activation)
|
668
|
+
self.hidden2_gradients = dCd_hidden2
|
669
|
+
|
670
|
+
self.hidden_gradients = self.hidden2_gradients.dot(self.hidden2_weights)
|
671
|
+
else:
|
672
|
+
# the non-linearity appears right after the convolution max
|
673
|
+
self.hidden_gradients = dCd_tanh * hardtanhd(self.hidden_before_activation)
|
674
|
+
|
675
|
+
@cython.boundscheck(False)
|
676
|
+
@cython.wraparound(False)
|
677
|
+
def _adjust_weights(self, predicate, arguments=None):
|
678
|
+
"""
|
679
|
+
Adjusts the network weights after gradients have been calculated.
|
680
|
+
"""
|
681
|
+
cdef int i
|
682
|
+
cdef np.ndarray[FLOAT_t, ndim=1] gradients_t
|
683
|
+
cdef np.ndarray[FLOAT_t, ndim=2] last_values, deltas, grad_matrix, input_values
|
684
|
+
|
685
|
+
last_values = self.hidden2_values if self.hidden2_weights is not None else self.hidden_values
|
686
|
+
deltas = self.net_gradients.T.dot(last_values) * self.learning_rate
|
687
|
+
self.output_weights += deltas
|
688
|
+
self.output_bias += self.net_gradients.sum(0) * self.learning_rate
|
689
|
+
|
690
|
+
if self.hidden2_weights is not None:
|
691
|
+
deltas = self.hidden2_gradients.T.dot(self.hidden_values) * self.learning_rate
|
692
|
+
self.hidden2_weights += deltas
|
693
|
+
self.hidden2_bias += self.hidden2_gradients.sum(0) * self.learning_rate
|
694
|
+
|
695
|
+
# now adjust weights from input to convolution. these will be trickier.
|
696
|
+
# we need to know which input value to use in the delta formula
|
697
|
+
|
698
|
+
# I tried vectorizing this loop but it got a bit slower, probably because
|
699
|
+
# of the overhead in building matrices/tensors with the max indices
|
700
|
+
for i, neuron_maxes in enumerate(self.max_indices):
|
701
|
+
# i indicates the i-th target
|
702
|
+
|
703
|
+
gradients_t = self.hidden_gradients[i] * self.learning_rate
|
704
|
+
|
705
|
+
# table containing in each line the input values selected for each convolution neuron
|
706
|
+
input_values = self.input_sent_values.take(neuron_maxes, 0)
|
707
|
+
|
708
|
+
# stack the gradients to multiply all weights for a neuron
|
709
|
+
grad_matrix = np.tile(gradients_t, [self.input_size, 1]).T
|
710
|
+
self.hidden_weights += grad_matrix * input_values
|
711
|
+
|
712
|
+
# target distance weights
|
713
|
+
# get the relative distance from each max token to its target
|
714
|
+
if arguments is None:
|
715
|
+
target_dists = neuron_maxes - i
|
716
|
+
else:
|
717
|
+
argument = arguments[i]
|
718
|
+
target_dists = self.argument_distances(neuron_maxes, argument)
|
719
|
+
|
720
|
+
dist_features = self.target_dist_lookup.take(target_dists + self.target_dist_offset,
|
721
|
+
0, mode='clip')
|
722
|
+
grad_matrix = np.tile(gradients_t, [self.target_dist_weights.shape[0], 1]).T
|
723
|
+
self.target_dist_weights += (grad_matrix * dist_features).T
|
724
|
+
|
725
|
+
# predicate distance weights
|
726
|
+
# get the distance from each max token to its predicate
|
727
|
+
pred_dists = neuron_maxes - predicate
|
728
|
+
dist_features = self.pred_dist_lookup.take(pred_dists + self.pred_dist_offset,
|
729
|
+
0, mode='clip')
|
730
|
+
# try to recycle the grad_matrix if sizes match
|
731
|
+
if self.target_dist_weights.shape[0] != self.pred_dist_weights.shape[0]:
|
732
|
+
grad_matrix = np.tile(gradients_t, [self.pred_dist_weights.shape[0], 1]).T
|
733
|
+
|
734
|
+
self.pred_dist_weights += (grad_matrix * dist_features).T
|
735
|
+
|
736
|
+
self.hidden_bias += self.hidden_gradients.sum(0) * self.learning_rate
|
737
|
+
|
738
|
+
# Adjusts the transition scores table with the calculated gradients.
|
739
|
+
if not self.only_classify and self.transitions is not None:
|
740
|
+
self.transitions += self.trans_gradients * self.learning_rate_trans
|
741
|
+
|
742
|
+
@cython.boundscheck(False)
|
743
|
+
@cython.wraparound(False)
|
744
|
+
def _calculate_input_deltas(self, np.ndarray sentence, int predicate,
|
745
|
+
object arguments=None):
|
746
|
+
"""
|
747
|
+
Calculates the input deltas to be applied in the feature tables.
|
748
|
+
"""
|
749
|
+
cdef np.ndarray[FLOAT_t, ndim=2] hidden_gradients, input_gradients
|
750
|
+
cdef np.ndarray[FLOAT_t, ndim=2] target_dist_gradients, pred_dist_gradients
|
751
|
+
cdef np.ndarray[FLOAT_t, ndim=1] gradients
|
752
|
+
cdef np.ndarray[INT_t, ndim=1] convolution_max, target_dists
|
753
|
+
|
754
|
+
# matrices accumulating gradients over each target
|
755
|
+
# each matrix has a whole window in each line
|
756
|
+
input_gradients = np.zeros((len(sentence), self.hidden_size))
|
757
|
+
target_dist_gradients = np.zeros((self.target_dist_lookup.shape[0], self.hidden_size))
|
758
|
+
pred_dist_gradients = np.zeros((self.pred_dist_lookup.shape[0], self.hidden_size))
|
759
|
+
|
760
|
+
# avoid multiplying by the learning rate multiple times
|
761
|
+
hidden_gradients = self.hidden_gradients * self.learning_rate_features
|
762
|
+
cdef np.ndarray[INT_t, ndim=1] column_numbers = np.arange(self.hidden_size)
|
763
|
+
|
764
|
+
for target in range(self.num_targets):
|
765
|
+
|
766
|
+
# array with the tokens that yielded the maximum value in each neuron
|
767
|
+
# for this target
|
768
|
+
convolution_max = self.max_indices[target]
|
769
|
+
|
770
|
+
if not self.only_classify:
|
771
|
+
target_dists = convolution_max - target
|
772
|
+
else:
|
773
|
+
argument = arguments[target]
|
774
|
+
target_dists = self.argument_distances(convolution_max, argument)
|
775
|
+
|
776
|
+
target_dists = np.clip(target_dists + self.target_dist_offset, 0,
|
777
|
+
self.target_dist_lookup.shape[0] - 1)
|
778
|
+
pred_dists = convolution_max - predicate
|
779
|
+
pred_dists = np.clip(pred_dists + self.pred_dist_offset, 0,
|
780
|
+
self.pred_dist_lookup.shape[0] - 1)
|
781
|
+
|
782
|
+
gradients = hidden_gradients[target]
|
783
|
+
|
784
|
+
# sparse matrix with gradients to be applied over the input
|
785
|
+
# line i has the gradients for the i-th token in the sentence
|
786
|
+
input_gradients[convolution_max, np.arange(self.hidden_size)] += gradients
|
787
|
+
|
788
|
+
# distance deltas
|
789
|
+
target_dist_gradients[target_dists, np.arange(self.hidden_size)] += gradients
|
790
|
+
pred_dist_gradients[pred_dists, np.arange(self.hidden_size)] += gradients
|
791
|
+
|
792
|
+
self.input_deltas = input_gradients.dot(self.hidden_weights)
|
793
|
+
self.target_dist_deltas = target_dist_gradients.dot(self.target_dist_weights.T)
|
794
|
+
self.pred_dist_deltas = pred_dist_gradients.dot(self.pred_dist_weights.T)
|
795
|
+
|
796
|
+
|
797
|
+
def _adjust_features(self, sentence, predicate):
|
798
|
+
"""Adjusts the features in all feature tables."""
|
799
|
+
# compute each token in the window separately and
|
800
|
+
# separate the feature deltas into tables
|
801
|
+
start_from = 0
|
802
|
+
dist_target_from = 0
|
803
|
+
dist_pred_from = 0
|
804
|
+
|
805
|
+
# number of times that the minimum and maximum distances are repeated
|
806
|
+
# in the lookup distance tables
|
807
|
+
pre_dist = self.word_window_size
|
808
|
+
pos_dist = 1
|
809
|
+
if self.word_window_size > 1:
|
810
|
+
padded_sentence = np.concatenate((self.pre_padding,
|
811
|
+
sentence,
|
812
|
+
self.pos_padding))
|
813
|
+
else:
|
814
|
+
padded_sentence = sentence
|
815
|
+
|
816
|
+
for i in range(self.word_window_size):
|
817
|
+
|
818
|
+
for j, table in enumerate(self.feature_tables):
|
819
|
+
# this is the column for the i-th position in the window
|
820
|
+
# regarding features from the j-th table
|
821
|
+
table_deltas = self.input_deltas[:, start_from:start_from + table.shape[1]]
|
822
|
+
start_from += table.shape[1]
|
823
|
+
|
824
|
+
for token, deltas in zip(padded_sentence[i:], table_deltas):
|
825
|
+
table[token[j]] += deltas
|
826
|
+
|
827
|
+
dist_deltas = self.target_dist_deltas[:, dist_target_from : dist_target_from + self.target_dist_table.shape[1] ]
|
828
|
+
pre_deltas = dist_deltas.take(np.arange(pre_dist), 0).sum(0)
|
829
|
+
pos_deltas = dist_deltas.take(np.arange(-pos_dist, 0), 0).sum(0)
|
830
|
+
self.target_dist_table[1:-1, :] += dist_deltas[pre_dist : -pos_dist]
|
831
|
+
self.target_dist_table[0] += pre_deltas
|
832
|
+
self.target_dist_table[-1] += pos_deltas
|
833
|
+
dist_target_from += self.target_dist_table.shape[1]
|
834
|
+
|
835
|
+
dist_deltas = self.pred_dist_deltas[:, dist_pred_from : dist_pred_from + self.pred_dist_table.shape[1] ]
|
836
|
+
pre_deltas = dist_deltas.take(np.arange(pre_dist), 0).sum(0)
|
837
|
+
pos_deltas = dist_deltas.take(np.arange(-pos_dist, 0), 0).sum(0)
|
838
|
+
self.pred_dist_table[1:-1, :] += dist_deltas[pre_dist : -pos_dist]
|
839
|
+
self.pred_dist_table[0] += pre_deltas
|
840
|
+
self.pred_dist_table[-1] += pos_deltas
|
841
|
+
|
842
|
+
pre_dist -= 1
|
843
|
+
pos_dist += 1
|
844
|
+
dist_pred_from += self.pred_dist_table.shape[1]
|
845
|
+
|
846
|
+
self._create_target_lookup()
|
847
|
+
self._create_pred_lookup()
|
848
|
+
|
849
|
+
@cython.boundscheck(False)
|
850
|
+
def _viterbi(self, np.ndarray[FLOAT_t, ndim=2] scores, bool allow_repeats=True):
|
851
|
+
"""
|
852
|
+
Performs a Viterbi search over the scores for each tag using
|
853
|
+
the transitions matrix. If a matrix wasn't supplied,
|
854
|
+
it will return the tags with the highest scores individually.
|
855
|
+
"""
|
856
|
+
if self.transitions is None:
|
857
|
+
best_scores = scores.argmax(1)
|
858
|
+
|
859
|
+
if allow_repeats:
|
860
|
+
return best_scores
|
861
|
+
|
862
|
+
# we must find the combination of tags that maximizes the probabilities
|
863
|
+
logadd = np.log(np.sum(np.exp(scores), 1))
|
864
|
+
logprobs = (scores.T - logadd).T
|
865
|
+
counts = np.bincount(best_scores)
|
866
|
+
|
867
|
+
while counts.max() != 1:
|
868
|
+
# find the tag with the most conflicting args
|
869
|
+
conflicting_tag = counts.argmax()
|
870
|
+
|
871
|
+
# arguments with that tag as current maximum
|
872
|
+
args = np.where(best_scores == conflicting_tag)[0]
|
873
|
+
|
874
|
+
# get the logprobs for those args having this tag
|
875
|
+
conflicting_probs = logprobs[args, conflicting_tag]
|
876
|
+
|
877
|
+
# find the argument with the highest probability for that tag
|
878
|
+
highest_prob_arg = args[conflicting_probs.argmax()]
|
879
|
+
|
880
|
+
# set the score for other arguments in that tag to a low value
|
881
|
+
other_args = args[args != highest_prob_arg]
|
882
|
+
scores[other_args, conflicting_tag] = -1000
|
883
|
+
|
884
|
+
# and find the new maxes, without recalculating probabilities
|
885
|
+
best_scores = scores.argmax(1)
|
886
|
+
counts = np.bincount(best_scores)
|
887
|
+
|
888
|
+
return best_scores
|
889
|
+
|
890
|
+
path_scores = np.empty_like(scores)
|
891
|
+
path_backtrack = np.empty_like(scores, int)
|
892
|
+
#path_backtrack = np.empty_like(scores, np.int)
|
893
|
+
|
894
|
+
# now the actual Viterbi algorithm
|
895
|
+
# first, get the scores for each tag at token 0
|
896
|
+
# the last row of the transitions table has the scores for the first tag
|
897
|
+
path_scores[0] = scores[0] + self.transitions[-1]
|
898
|
+
|
899
|
+
for i, token in enumerate(scores[1:], 1):
|
900
|
+
|
901
|
+
# each line contains the score until each tag t plus the transition to each other tag t'
|
902
|
+
prev_score_and_trans = (path_scores[i - 1] + self.transitions[:-1].T).T
|
903
|
+
|
904
|
+
# find the previous tag that yielded the max score
|
905
|
+
path_backtrack[i] = prev_score_and_trans.argmax(0)
|
906
|
+
path_scores[i] = prev_score_and_trans[path_backtrack[i],
|
907
|
+
np.arange(self.output_size)] + scores[i]
|
908
|
+
|
909
|
+
# now find the maximum score for the last token and follow the backtrack
|
910
|
+
answer = np.empty(len(scores), dtype=int)
|
911
|
+
#answer = np.empty(len(scores), dtype=np.int)
|
912
|
+
answer[-1] = path_scores[-1].argmax()
|
913
|
+
self.answer_score = path_scores[-1][answer[-1]]
|
914
|
+
previous_tag = path_backtrack[-1][answer[-1]]
|
915
|
+
|
916
|
+
for i in range(len(scores) - 2, 0, -1):
|
917
|
+
answer[i] = previous_tag
|
918
|
+
previous_tag = path_backtrack[i][previous_tag]
|
919
|
+
|
920
|
+
answer[0] = previous_tag
|
921
|
+
return answer
|
922
|
+
|
923
|
+
def _create_target_lookup(self):
|
924
|
+
"""
|
925
|
+
Creates a lookup table with the window value for each different distance
|
926
|
+
to the target token (target_dist_lookup) and one with the precomputed
|
927
|
+
values in the convolution layer (target_convolution_lookup).
|
928
|
+
"""
|
929
|
+
# consider padding. if the table has 10 entries, with a word window of 3,
|
930
|
+
# we would have to consider up to the distance of 11, because of the padding.
|
931
|
+
num_distances = self.target_dist_table.shape[0] + self.word_window_size - 1
|
932
|
+
self.target_dist_lookup = np.empty((num_distances,
|
933
|
+
self.word_window_size * self.target_dist_table.shape[1]))
|
934
|
+
self.target_dist_offset = num_distances / 2
|
935
|
+
window_from = 0
|
936
|
+
window_to = self.target_dist_table.shape[1]
|
937
|
+
for i in range(self.word_window_size):
|
938
|
+
# each token in the window will is shifted in relation to the middle one
|
939
|
+
shift = i - self.half_window
|
940
|
+
|
941
|
+
# discount half window size because of the extra distances we added for padding
|
942
|
+
inds = np.arange(shift, num_distances + shift) - self.half_window
|
943
|
+
inds = np.clip(inds, 0, self.target_dist_table.shape[0] - 1)
|
944
|
+
self.target_dist_lookup[:,window_from : window_to] = self.target_dist_table[inds,]
|
945
|
+
|
946
|
+
window_from = window_to
|
947
|
+
window_to += self.target_dist_table.shape[1]
|
948
|
+
|
949
|
+
self.target_convolution_lookup = self.target_dist_lookup.dot(self.target_dist_weights)
|
950
|
+
|
951
|
+
|
952
|
+
def _create_pred_lookup(self):
|
953
|
+
"""
|
954
|
+
Creates a lookup table with the window value for each different distance
|
955
|
+
to the predicate token (pred_dist_lookup) and one with the precomputed
|
956
|
+
values in the convolution layer (pred_convolution_lookup).
|
957
|
+
"""
|
958
|
+
# consider padding. if the table has 10 entries, with a word window of 3,
|
959
|
+
# we would have to consider up to the distance of 11, because of the padding.
|
960
|
+
num_distances = self.pred_dist_table.shape[0] + self.word_window_size - 1
|
961
|
+
self.pred_dist_lookup = np.empty((num_distances,
|
962
|
+
self.word_window_size * self.pred_dist_table.shape[1]))
|
963
|
+
self.pred_dist_offset = num_distances / 2
|
964
|
+
window_from = 0
|
965
|
+
window_to = self.pred_dist_table.shape[1]
|
966
|
+
for i in range(self.word_window_size):
|
967
|
+
# each token in the window will is shifted in relation to the middle one
|
968
|
+
shift = i - self.half_window
|
969
|
+
|
970
|
+
# discount half window size because of the extra distances we added for padding
|
971
|
+
inds = np.arange(shift, num_distances + shift) - self.half_window
|
972
|
+
inds = np.clip(inds, 0, self.pred_dist_table.shape[0] - 1)
|
973
|
+
self.pred_dist_lookup[:,window_from : window_to] = self.pred_dist_table[inds,]
|
974
|
+
|
975
|
+
window_from = window_to
|
976
|
+
window_to += self.pred_dist_table.shape[1]
|
977
|
+
|
978
|
+
self.pred_convolution_lookup = self.pred_dist_lookup.dot(self.pred_dist_weights)
|
979
|
+
|
980
|
+
def _create_convolution_lookup(self, sentence, training):
|
981
|
+
"""
|
982
|
+
Creates a lookup table storing the values found by each
|
983
|
+
convolutional neuron before summing distance features.
|
984
|
+
The table has the format len(sent) x len(convol layer)
|
985
|
+
Biases are not included.
|
986
|
+
"""
|
987
|
+
cdef np.ndarray padded_sentence
|
988
|
+
|
989
|
+
# add padding to the sentence
|
990
|
+
if self.word_window_size > 1:
|
991
|
+
padded_sentence = np.vstack((self.pre_padding,
|
992
|
+
sentence,
|
993
|
+
self.pos_padding))
|
994
|
+
else:
|
995
|
+
padded_sentence = sentence
|
996
|
+
|
997
|
+
self.convolution_lookup = np.empty((len(sentence), self.hidden_size))
|
998
|
+
#print(len(sentence), self.hidden_size, len(self.convolution_lookup[0]))
|
999
|
+
|
1000
|
+
# first window
|
1001
|
+
cdef np.ndarray window = padded_sentence[:self.word_window_size]
|
1002
|
+
cdef np.ndarray input_data
|
1003
|
+
input_data = np.concatenate(
|
1004
|
+
[table[index]
|
1005
|
+
for token_indices in window
|
1006
|
+
for index, table in zip(token_indices,
|
1007
|
+
self.feature_tables)
|
1008
|
+
]
|
1009
|
+
)
|
1010
|
+
self.convolution_lookup[0] = self.hidden_weights.dot(input_data)
|
1011
|
+
if training:
|
1012
|
+
# store the values of each input -- needed when adjusting features
|
1013
|
+
self.input_sent_values[0] = input_data
|
1014
|
+
|
1015
|
+
cdef np.ndarray[FLOAT_t, ndim=1] new_data
|
1016
|
+
for i, element in enumerate(padded_sentence[self.word_window_size:], 1):
|
1017
|
+
new_data = np.concatenate([table[index] for
|
1018
|
+
index, table in zip(element, self.feature_tables)])
|
1019
|
+
|
1020
|
+
# slide the window to the next element
|
1021
|
+
input_data = np.concatenate((input_data[self.features_per_token:],
|
1022
|
+
new_data))
|
1023
|
+
|
1024
|
+
#print(i, input_data)
|
1025
|
+
self.convolution_lookup[i] = self.hidden_weights.dot(input_data)
|
1026
|
+
if training:
|
1027
|
+
self.input_sent_values[i] = input_data
|
1028
|
+
|