@ccmaymay/concrete 4.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ActiveLearnerClientService.js +249 -0
- package/ActiveLearnerServerService.js +642 -0
- package/AnnotateCommunicationService.js +696 -0
- package/AnnotateWithContextService.js +298 -0
- package/FeedbackService.js +750 -0
- package/FetchCommunicationService.js +709 -0
- package/LICENSE +31 -0
- package/ResultsServerService.js +2138 -0
- package/SearchProxyService.js +962 -0
- package/SearchService.js +685 -0
- package/Service.js +373 -0
- package/StoreCommunicationService.js +255 -0
- package/SummarizationService.js +479 -0
- package/access_types.js +168 -0
- package/annotate_types.js +26 -0
- package/audio_types.js +110 -0
- package/cluster_types.js +398 -0
- package/communication_fu.js +432 -0
- package/communication_types.js +845 -0
- package/concrete.js +65 -0
- package/context_types.js +65 -0
- package/email_types.js +477 -0
- package/entities_types.js +658 -0
- package/ex_types.js +82 -0
- package/language_types.js +123 -0
- package/learn_types.js +207 -0
- package/linking_types.js +286 -0
- package/metadata_types.js +926 -0
- package/nitf_types.js +1005 -0
- package/package.json +23 -0
- package/results_types.js +18 -0
- package/search_types.js +661 -0
- package/services_types.js +384 -0
- package/situations_types.js +1268 -0
- package/spans_types.js +151 -0
- package/structure_types.js +2311 -0
- package/summarization_types.js +433 -0
- package/tokenization_fu.js +33 -0
- package/tokentagging_fu.js +241 -0
- package/twitter_types.js +1553 -0
- package/util.js +118 -0
- package/uuid_types.js +67 -0
@@ -0,0 +1,433 @@
|
|
1
|
+
//
|
2
|
+
// Autogenerated by Thrift Compiler (0.15.0)
|
3
|
+
//
|
4
|
+
// DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
|
5
|
+
//
|
6
|
+
"use strict";
|
7
|
+
|
8
|
+
const thrift = require('thrift');
|
9
|
+
const Thrift = thrift.Thrift;
|
10
|
+
const Int64 = require('node-int64');
|
11
|
+
|
12
|
+
const communication_ttypes = require('./communication_types');
|
13
|
+
const services_ttypes = require('./services_types');
|
14
|
+
const structure_ttypes = require('./structure_types');
|
15
|
+
const uuid_ttypes = require('./uuid_types');
|
16
|
+
|
17
|
+
|
18
|
+
const ttypes = module.exports = {};
|
19
|
+
ttypes.SummarySourceType = {
|
20
|
+
'DOCUMENT' : 0,
|
21
|
+
'TOKENIZATION' : 1,
|
22
|
+
'ENTITY' : 2
|
23
|
+
};
|
24
|
+
const SummarizationRequest = module.exports.SummarizationRequest = class {
|
25
|
+
constructor(args) {
|
26
|
+
this.queryTerms = null;
|
27
|
+
this.maximumTokens = null;
|
28
|
+
this.maximumCharacters = null;
|
29
|
+
this.sourceType = null;
|
30
|
+
this.sourceIds = null;
|
31
|
+
this.sourceCommunication = null;
|
32
|
+
if (args) {
|
33
|
+
if (args.queryTerms !== undefined && args.queryTerms !== null) {
|
34
|
+
this.queryTerms = Thrift.copyList(args.queryTerms, [null]);
|
35
|
+
}
|
36
|
+
if (args.maximumTokens !== undefined && args.maximumTokens !== null) {
|
37
|
+
this.maximumTokens = args.maximumTokens;
|
38
|
+
}
|
39
|
+
if (args.maximumCharacters !== undefined && args.maximumCharacters !== null) {
|
40
|
+
this.maximumCharacters = args.maximumCharacters;
|
41
|
+
}
|
42
|
+
if (args.sourceType !== undefined && args.sourceType !== null) {
|
43
|
+
this.sourceType = args.sourceType;
|
44
|
+
}
|
45
|
+
if (args.sourceIds !== undefined && args.sourceIds !== null) {
|
46
|
+
this.sourceIds = Thrift.copyList(args.sourceIds, [uuid_ttypes.UUID]);
|
47
|
+
}
|
48
|
+
if (args.sourceCommunication !== undefined && args.sourceCommunication !== null) {
|
49
|
+
this.sourceCommunication = new communication_ttypes.Communication(args.sourceCommunication);
|
50
|
+
}
|
51
|
+
}
|
52
|
+
}
|
53
|
+
|
54
|
+
read (input) {
|
55
|
+
input.readStructBegin();
|
56
|
+
while (true) {
|
57
|
+
const ret = input.readFieldBegin();
|
58
|
+
const ftype = ret.ftype;
|
59
|
+
const fid = ret.fid;
|
60
|
+
if (ftype == Thrift.Type.STOP) {
|
61
|
+
break;
|
62
|
+
}
|
63
|
+
switch (fid) {
|
64
|
+
case 1:
|
65
|
+
if (ftype == Thrift.Type.LIST) {
|
66
|
+
this.queryTerms = [];
|
67
|
+
const _rtmp31 = input.readListBegin();
|
68
|
+
const _size0 = _rtmp31.size || 0;
|
69
|
+
for (let _i2 = 0; _i2 < _size0; ++_i2) {
|
70
|
+
let elem3 = null;
|
71
|
+
elem3 = input.readString();
|
72
|
+
this.queryTerms.push(elem3);
|
73
|
+
}
|
74
|
+
input.readListEnd();
|
75
|
+
} else {
|
76
|
+
input.skip(ftype);
|
77
|
+
}
|
78
|
+
break;
|
79
|
+
case 2:
|
80
|
+
if (ftype == Thrift.Type.I32) {
|
81
|
+
this.maximumTokens = input.readI32();
|
82
|
+
} else {
|
83
|
+
input.skip(ftype);
|
84
|
+
}
|
85
|
+
break;
|
86
|
+
case 3:
|
87
|
+
if (ftype == Thrift.Type.I32) {
|
88
|
+
this.maximumCharacters = input.readI32();
|
89
|
+
} else {
|
90
|
+
input.skip(ftype);
|
91
|
+
}
|
92
|
+
break;
|
93
|
+
case 4:
|
94
|
+
if (ftype == Thrift.Type.I32) {
|
95
|
+
this.sourceType = input.readI32();
|
96
|
+
} else {
|
97
|
+
input.skip(ftype);
|
98
|
+
}
|
99
|
+
break;
|
100
|
+
case 5:
|
101
|
+
if (ftype == Thrift.Type.LIST) {
|
102
|
+
this.sourceIds = [];
|
103
|
+
const _rtmp35 = input.readListBegin();
|
104
|
+
const _size4 = _rtmp35.size || 0;
|
105
|
+
for (let _i6 = 0; _i6 < _size4; ++_i6) {
|
106
|
+
let elem7 = null;
|
107
|
+
elem7 = new uuid_ttypes.UUID();
|
108
|
+
elem7.read(input);
|
109
|
+
this.sourceIds.push(elem7);
|
110
|
+
}
|
111
|
+
input.readListEnd();
|
112
|
+
} else {
|
113
|
+
input.skip(ftype);
|
114
|
+
}
|
115
|
+
break;
|
116
|
+
case 6:
|
117
|
+
if (ftype == Thrift.Type.STRUCT) {
|
118
|
+
this.sourceCommunication = new communication_ttypes.Communication();
|
119
|
+
this.sourceCommunication.read(input);
|
120
|
+
} else {
|
121
|
+
input.skip(ftype);
|
122
|
+
}
|
123
|
+
break;
|
124
|
+
default:
|
125
|
+
input.skip(ftype);
|
126
|
+
}
|
127
|
+
input.readFieldEnd();
|
128
|
+
}
|
129
|
+
input.readStructEnd();
|
130
|
+
return;
|
131
|
+
}
|
132
|
+
|
133
|
+
write (output) {
|
134
|
+
output.writeStructBegin('SummarizationRequest');
|
135
|
+
if (this.queryTerms !== null && this.queryTerms !== undefined) {
|
136
|
+
output.writeFieldBegin('queryTerms', Thrift.Type.LIST, 1);
|
137
|
+
output.writeListBegin(Thrift.Type.STRING, this.queryTerms.length);
|
138
|
+
for (let iter8 in this.queryTerms) {
|
139
|
+
if (this.queryTerms.hasOwnProperty(iter8)) {
|
140
|
+
iter8 = this.queryTerms[iter8];
|
141
|
+
output.writeString(iter8);
|
142
|
+
}
|
143
|
+
}
|
144
|
+
output.writeListEnd();
|
145
|
+
output.writeFieldEnd();
|
146
|
+
}
|
147
|
+
if (this.maximumTokens !== null && this.maximumTokens !== undefined) {
|
148
|
+
output.writeFieldBegin('maximumTokens', Thrift.Type.I32, 2);
|
149
|
+
output.writeI32(this.maximumTokens);
|
150
|
+
output.writeFieldEnd();
|
151
|
+
}
|
152
|
+
if (this.maximumCharacters !== null && this.maximumCharacters !== undefined) {
|
153
|
+
output.writeFieldBegin('maximumCharacters', Thrift.Type.I32, 3);
|
154
|
+
output.writeI32(this.maximumCharacters);
|
155
|
+
output.writeFieldEnd();
|
156
|
+
}
|
157
|
+
if (this.sourceType !== null && this.sourceType !== undefined) {
|
158
|
+
output.writeFieldBegin('sourceType', Thrift.Type.I32, 4);
|
159
|
+
output.writeI32(this.sourceType);
|
160
|
+
output.writeFieldEnd();
|
161
|
+
}
|
162
|
+
if (this.sourceIds !== null && this.sourceIds !== undefined) {
|
163
|
+
output.writeFieldBegin('sourceIds', Thrift.Type.LIST, 5);
|
164
|
+
output.writeListBegin(Thrift.Type.STRUCT, this.sourceIds.length);
|
165
|
+
for (let iter9 in this.sourceIds) {
|
166
|
+
if (this.sourceIds.hasOwnProperty(iter9)) {
|
167
|
+
iter9 = this.sourceIds[iter9];
|
168
|
+
iter9.write(output);
|
169
|
+
}
|
170
|
+
}
|
171
|
+
output.writeListEnd();
|
172
|
+
output.writeFieldEnd();
|
173
|
+
}
|
174
|
+
if (this.sourceCommunication !== null && this.sourceCommunication !== undefined) {
|
175
|
+
output.writeFieldBegin('sourceCommunication', Thrift.Type.STRUCT, 6);
|
176
|
+
this.sourceCommunication.write(output);
|
177
|
+
output.writeFieldEnd();
|
178
|
+
}
|
179
|
+
output.writeFieldStop();
|
180
|
+
output.writeStructEnd();
|
181
|
+
return;
|
182
|
+
}
|
183
|
+
|
184
|
+
};
|
185
|
+
const SummaryConcept = module.exports.SummaryConcept = class {
|
186
|
+
constructor(args) {
|
187
|
+
this.tokens = null;
|
188
|
+
this.concept = null;
|
189
|
+
this.confidence = 1;
|
190
|
+
this.utility = 1;
|
191
|
+
if (args) {
|
192
|
+
if (args.tokens !== undefined && args.tokens !== null) {
|
193
|
+
this.tokens = new structure_ttypes.TokenRefSequence(args.tokens);
|
194
|
+
}
|
195
|
+
if (args.concept !== undefined && args.concept !== null) {
|
196
|
+
this.concept = args.concept;
|
197
|
+
}
|
198
|
+
if (args.confidence !== undefined && args.confidence !== null) {
|
199
|
+
this.confidence = args.confidence;
|
200
|
+
}
|
201
|
+
if (args.utility !== undefined && args.utility !== null) {
|
202
|
+
this.utility = args.utility;
|
203
|
+
}
|
204
|
+
}
|
205
|
+
}
|
206
|
+
|
207
|
+
read (input) {
|
208
|
+
input.readStructBegin();
|
209
|
+
while (true) {
|
210
|
+
const ret = input.readFieldBegin();
|
211
|
+
const ftype = ret.ftype;
|
212
|
+
const fid = ret.fid;
|
213
|
+
if (ftype == Thrift.Type.STOP) {
|
214
|
+
break;
|
215
|
+
}
|
216
|
+
switch (fid) {
|
217
|
+
case 1:
|
218
|
+
if (ftype == Thrift.Type.STRUCT) {
|
219
|
+
this.tokens = new structure_ttypes.TokenRefSequence();
|
220
|
+
this.tokens.read(input);
|
221
|
+
} else {
|
222
|
+
input.skip(ftype);
|
223
|
+
}
|
224
|
+
break;
|
225
|
+
case 2:
|
226
|
+
if (ftype == Thrift.Type.STRING) {
|
227
|
+
this.concept = input.readString();
|
228
|
+
} else {
|
229
|
+
input.skip(ftype);
|
230
|
+
}
|
231
|
+
break;
|
232
|
+
case 3:
|
233
|
+
if (ftype == Thrift.Type.DOUBLE) {
|
234
|
+
this.confidence = input.readDouble();
|
235
|
+
} else {
|
236
|
+
input.skip(ftype);
|
237
|
+
}
|
238
|
+
break;
|
239
|
+
case 4:
|
240
|
+
if (ftype == Thrift.Type.DOUBLE) {
|
241
|
+
this.utility = input.readDouble();
|
242
|
+
} else {
|
243
|
+
input.skip(ftype);
|
244
|
+
}
|
245
|
+
break;
|
246
|
+
default:
|
247
|
+
input.skip(ftype);
|
248
|
+
}
|
249
|
+
input.readFieldEnd();
|
250
|
+
}
|
251
|
+
input.readStructEnd();
|
252
|
+
return;
|
253
|
+
}
|
254
|
+
|
255
|
+
write (output) {
|
256
|
+
output.writeStructBegin('SummaryConcept');
|
257
|
+
if (this.tokens !== null && this.tokens !== undefined) {
|
258
|
+
output.writeFieldBegin('tokens', Thrift.Type.STRUCT, 1);
|
259
|
+
this.tokens.write(output);
|
260
|
+
output.writeFieldEnd();
|
261
|
+
}
|
262
|
+
if (this.concept !== null && this.concept !== undefined) {
|
263
|
+
output.writeFieldBegin('concept', Thrift.Type.STRING, 2);
|
264
|
+
output.writeString(this.concept);
|
265
|
+
output.writeFieldEnd();
|
266
|
+
}
|
267
|
+
if (this.confidence !== null && this.confidence !== undefined) {
|
268
|
+
output.writeFieldBegin('confidence', Thrift.Type.DOUBLE, 3);
|
269
|
+
output.writeDouble(this.confidence);
|
270
|
+
output.writeFieldEnd();
|
271
|
+
}
|
272
|
+
if (this.utility !== null && this.utility !== undefined) {
|
273
|
+
output.writeFieldBegin('utility', Thrift.Type.DOUBLE, 4);
|
274
|
+
output.writeDouble(this.utility);
|
275
|
+
output.writeFieldEnd();
|
276
|
+
}
|
277
|
+
output.writeFieldStop();
|
278
|
+
output.writeStructEnd();
|
279
|
+
return;
|
280
|
+
}
|
281
|
+
|
282
|
+
};
|
283
|
+
const Summary = module.exports.Summary = class {
|
284
|
+
constructor(args) {
|
285
|
+
this.summaryCommunication = null;
|
286
|
+
this.concepts = null;
|
287
|
+
if (args) {
|
288
|
+
if (args.summaryCommunication !== undefined && args.summaryCommunication !== null) {
|
289
|
+
this.summaryCommunication = new communication_ttypes.Communication(args.summaryCommunication);
|
290
|
+
}
|
291
|
+
if (args.concepts !== undefined && args.concepts !== null) {
|
292
|
+
this.concepts = Thrift.copyList(args.concepts, [ttypes.SummaryConcept]);
|
293
|
+
}
|
294
|
+
}
|
295
|
+
}
|
296
|
+
|
297
|
+
read (input) {
|
298
|
+
input.readStructBegin();
|
299
|
+
while (true) {
|
300
|
+
const ret = input.readFieldBegin();
|
301
|
+
const ftype = ret.ftype;
|
302
|
+
const fid = ret.fid;
|
303
|
+
if (ftype == Thrift.Type.STOP) {
|
304
|
+
break;
|
305
|
+
}
|
306
|
+
switch (fid) {
|
307
|
+
case 1:
|
308
|
+
if (ftype == Thrift.Type.STRUCT) {
|
309
|
+
this.summaryCommunication = new communication_ttypes.Communication();
|
310
|
+
this.summaryCommunication.read(input);
|
311
|
+
} else {
|
312
|
+
input.skip(ftype);
|
313
|
+
}
|
314
|
+
break;
|
315
|
+
case 2:
|
316
|
+
if (ftype == Thrift.Type.LIST) {
|
317
|
+
this.concepts = [];
|
318
|
+
const _rtmp311 = input.readListBegin();
|
319
|
+
const _size10 = _rtmp311.size || 0;
|
320
|
+
for (let _i12 = 0; _i12 < _size10; ++_i12) {
|
321
|
+
let elem13 = null;
|
322
|
+
elem13 = new ttypes.SummaryConcept();
|
323
|
+
elem13.read(input);
|
324
|
+
this.concepts.push(elem13);
|
325
|
+
}
|
326
|
+
input.readListEnd();
|
327
|
+
} else {
|
328
|
+
input.skip(ftype);
|
329
|
+
}
|
330
|
+
break;
|
331
|
+
default:
|
332
|
+
input.skip(ftype);
|
333
|
+
}
|
334
|
+
input.readFieldEnd();
|
335
|
+
}
|
336
|
+
input.readStructEnd();
|
337
|
+
return;
|
338
|
+
}
|
339
|
+
|
340
|
+
write (output) {
|
341
|
+
output.writeStructBegin('Summary');
|
342
|
+
if (this.summaryCommunication !== null && this.summaryCommunication !== undefined) {
|
343
|
+
output.writeFieldBegin('summaryCommunication', Thrift.Type.STRUCT, 1);
|
344
|
+
this.summaryCommunication.write(output);
|
345
|
+
output.writeFieldEnd();
|
346
|
+
}
|
347
|
+
if (this.concepts !== null && this.concepts !== undefined) {
|
348
|
+
output.writeFieldBegin('concepts', Thrift.Type.LIST, 2);
|
349
|
+
output.writeListBegin(Thrift.Type.STRUCT, this.concepts.length);
|
350
|
+
for (let iter14 in this.concepts) {
|
351
|
+
if (this.concepts.hasOwnProperty(iter14)) {
|
352
|
+
iter14 = this.concepts[iter14];
|
353
|
+
iter14.write(output);
|
354
|
+
}
|
355
|
+
}
|
356
|
+
output.writeListEnd();
|
357
|
+
output.writeFieldEnd();
|
358
|
+
}
|
359
|
+
output.writeFieldStop();
|
360
|
+
output.writeStructEnd();
|
361
|
+
return;
|
362
|
+
}
|
363
|
+
|
364
|
+
};
|
365
|
+
const SummarizationCapability = module.exports.SummarizationCapability = class {
|
366
|
+
constructor(args) {
|
367
|
+
this.type = null;
|
368
|
+
this.lang = null;
|
369
|
+
if (args) {
|
370
|
+
if (args.type !== undefined && args.type !== null) {
|
371
|
+
this.type = args.type;
|
372
|
+
} else {
|
373
|
+
throw new Thrift.TProtocolException(Thrift.TProtocolExceptionType.UNKNOWN, 'Required field type is unset!');
|
374
|
+
}
|
375
|
+
if (args.lang !== undefined && args.lang !== null) {
|
376
|
+
this.lang = args.lang;
|
377
|
+
} else {
|
378
|
+
throw new Thrift.TProtocolException(Thrift.TProtocolExceptionType.UNKNOWN, 'Required field lang is unset!');
|
379
|
+
}
|
380
|
+
}
|
381
|
+
}
|
382
|
+
|
383
|
+
read (input) {
|
384
|
+
input.readStructBegin();
|
385
|
+
while (true) {
|
386
|
+
const ret = input.readFieldBegin();
|
387
|
+
const ftype = ret.ftype;
|
388
|
+
const fid = ret.fid;
|
389
|
+
if (ftype == Thrift.Type.STOP) {
|
390
|
+
break;
|
391
|
+
}
|
392
|
+
switch (fid) {
|
393
|
+
case 1:
|
394
|
+
if (ftype == Thrift.Type.I32) {
|
395
|
+
this.type = input.readI32();
|
396
|
+
} else {
|
397
|
+
input.skip(ftype);
|
398
|
+
}
|
399
|
+
break;
|
400
|
+
case 2:
|
401
|
+
if (ftype == Thrift.Type.STRING) {
|
402
|
+
this.lang = input.readString();
|
403
|
+
} else {
|
404
|
+
input.skip(ftype);
|
405
|
+
}
|
406
|
+
break;
|
407
|
+
default:
|
408
|
+
input.skip(ftype);
|
409
|
+
}
|
410
|
+
input.readFieldEnd();
|
411
|
+
}
|
412
|
+
input.readStructEnd();
|
413
|
+
return;
|
414
|
+
}
|
415
|
+
|
416
|
+
write (output) {
|
417
|
+
output.writeStructBegin('SummarizationCapability');
|
418
|
+
if (this.type !== null && this.type !== undefined) {
|
419
|
+
output.writeFieldBegin('type', Thrift.Type.I32, 1);
|
420
|
+
output.writeI32(this.type);
|
421
|
+
output.writeFieldEnd();
|
422
|
+
}
|
423
|
+
if (this.lang !== null && this.lang !== undefined) {
|
424
|
+
output.writeFieldBegin('lang', Thrift.Type.STRING, 2);
|
425
|
+
output.writeString(this.lang);
|
426
|
+
output.writeFieldEnd();
|
427
|
+
}
|
428
|
+
output.writeFieldStop();
|
429
|
+
output.writeStructEnd();
|
430
|
+
return;
|
431
|
+
}
|
432
|
+
|
433
|
+
};
|
@@ -0,0 +1,33 @@
|
|
1
|
+
/**
|
2
|
+
* @class Tokenization
|
3
|
+
* @classdesc concrete.js extensions to the Tokenization class
|
4
|
+
*/
|
5
|
+
const Tokenization = module.exports = require('./structure_types').Tokenization;
|
6
|
+
|
7
|
+
/**
|
8
|
+
* Add a TokenTagging to this Tokenization
|
9
|
+
* @param {TokenTagging} tokenTagging
|
10
|
+
*/
|
11
|
+
Tokenization.prototype.addTokenTagging = function(tokenTagging) {
|
12
|
+
if (!this.tokenTaggingList) {
|
13
|
+
this.tokenTaggingList = [];
|
14
|
+
}
|
15
|
+
this.tokenTaggingList.push(tokenTagging);
|
16
|
+
};
|
17
|
+
|
18
|
+
/**
|
19
|
+
* Get all TokenTaggings with the specified taggingType
|
20
|
+
* @param {String} taggingType - A string specifying a TokenTagging.taggingType
|
21
|
+
* @returns {Array} A (possibly empty) array of TokenTagging objects
|
22
|
+
*/
|
23
|
+
Tokenization.prototype.getTokenTaggingsOfType = function(taggingType) {
|
24
|
+
var tokenTaggings = [];
|
25
|
+
|
26
|
+
for (var tokenTaggingIndex in this.tokenTaggingList) {
|
27
|
+
if (this.tokenTaggingList[tokenTaggingIndex].taggingType === taggingType) {
|
28
|
+
tokenTaggings.push(this.tokenTaggingList[tokenTaggingIndex]);
|
29
|
+
}
|
30
|
+
}
|
31
|
+
|
32
|
+
return tokenTaggings;
|
33
|
+
};
|
@@ -0,0 +1,241 @@
|
|
1
|
+
/**
|
2
|
+
* @class TokenTagging
|
3
|
+
* @classdesc concrete.js extensions to the TokenTagging class
|
4
|
+
*/
|
5
|
+
const concrete = {};
|
6
|
+
concrete.structure = require('./structure_types');
|
7
|
+
const TokenTagging = module.exports = concrete.structure.TokenTagging;
|
8
|
+
const TaggedToken = concrete.structure.TaggedToken;
|
9
|
+
concrete.metadata = require('./metadata_types');
|
10
|
+
const AnnotationMetadata = concrete.metadata.AnnotationMetadata;
|
11
|
+
|
12
|
+
const jQuery = {extend: function(){throw new Error("not implemented");}};
|
13
|
+
const $ = jQuery;
|
14
|
+
|
15
|
+
/**
|
16
|
+
* Create a valid TokenTagging with required fields AnnotationMetadata and UUID
|
17
|
+
*
|
18
|
+
* Example usage:
|
19
|
+
*
|
20
|
+
* tt = TokenTagging.create({taggingType: 'NER'}, {tool: 'HIT'})
|
21
|
+
*
|
22
|
+
* @param {Object} options - Override default TokenTagging fields (except metadata)
|
23
|
+
* @param {Object} metadataOptions - Override default tokenTagging.metadata fields
|
24
|
+
*/
|
25
|
+
TokenTagging.create = function(options, metadataOptions) {
|
26
|
+
var tokenTagging = new TokenTagging();
|
27
|
+
tokenTagging.metadata = new AnnotationMetadata();
|
28
|
+
tokenTagging.metadata.timestamp = Math.floor(Date.now()/1000);
|
29
|
+
tokenTagging.metadata.tool = 'concrete.js - TokenTagging.create()';
|
30
|
+
tokenTagging.taggedTokenList = [];
|
31
|
+
tokenTagging.taggingType = '';
|
32
|
+
tokenTagging.uuid = concrete.util.generateUUID();
|
33
|
+
|
34
|
+
tokenTagging = $.extend({}, tokenTagging, options);
|
35
|
+
tokenTagging.metadata = $.extend({}, tokenTagging.metadata, metadataOptions);
|
36
|
+
return tokenTagging;
|
37
|
+
};
|
38
|
+
|
39
|
+
/**
|
40
|
+
* Get BIO value for TaggedToken at tokenIndex
|
41
|
+
*
|
42
|
+
* @param {Integer] tokenIndex
|
43
|
+
* @returns {String|null} - 'B', 'I', 'O' or null
|
44
|
+
*/
|
45
|
+
TokenTagging.prototype.bioGetBIOValue = function(tokenIndex) {
|
46
|
+
var taggedToken = this.getTaggedTokenWithTokenIndex(tokenIndex);
|
47
|
+
if (taggedToken && taggedToken.tag) {
|
48
|
+
var firstChar = taggedToken.tag.charAt(0);
|
49
|
+
if (firstChar === 'B' || firstChar === 'I' || firstChar === 'O' ) {
|
50
|
+
return firstChar;
|
51
|
+
}
|
52
|
+
}
|
53
|
+
return null;
|
54
|
+
};
|
55
|
+
|
56
|
+
/**
|
57
|
+
* Get tag value (stripped of BIO tag and separator) for TaggedToken at tokenIndex
|
58
|
+
*
|
59
|
+
* @param {Integer] tokenIndex
|
60
|
+
* @returns {String|null} - 'B', 'I', 'O' or null
|
61
|
+
*
|
62
|
+
*/
|
63
|
+
TokenTagging.prototype.bioGetTagValue = function(tokenIndex) {
|
64
|
+
var taggedToken = this.getTaggedTokenWithTokenIndex(tokenIndex);
|
65
|
+
if (taggedToken && taggedToken.tag) {
|
66
|
+
return taggedToken.tag.substring(2);
|
67
|
+
}
|
68
|
+
return null;
|
69
|
+
};
|
70
|
+
|
71
|
+
/**
|
72
|
+
* Returns separator character for BIO TokenTaggings.
|
73
|
+
*
|
74
|
+
* If the separator character had not been set before this function was called,
|
75
|
+
* the separator character will be set to '-'.
|
76
|
+
*
|
77
|
+
* @returns {String} - Separator character for BIO TokenTaggings
|
78
|
+
*/
|
79
|
+
TokenTagging.prototype.bioGetTagSeparator = function() {
|
80
|
+
if (this.bioTagSeparator === undefined) {
|
81
|
+
this.bioTagSeparator = '-';
|
82
|
+
}
|
83
|
+
return this.bioTagSeparator;
|
84
|
+
};
|
85
|
+
|
86
|
+
/**
|
87
|
+
* Returns token index of 'B' tag for the (possibly multi-token) 'BI'
|
88
|
+
* tagging at the specified tokenIndex.
|
89
|
+
*
|
90
|
+
* If the tag at tokenIndex is a 'B' tag, return tokenIndex. If the
|
91
|
+
* tag at tokenIndex is an 'I' tag, find the index of the 'B' tag for
|
92
|
+
* this 'I' tag.
|
93
|
+
*
|
94
|
+
* @param {Number} tokenIndex -
|
95
|
+
* @returns {Number} - Token index of "B" tag
|
96
|
+
* @throws {TypeError} Thrown if the tag at TokenIndex is not a 'B' or
|
97
|
+
* 'I' tag. Also thrown if the tag at TokenIndex
|
98
|
+
* is a valid 'I' tag, but not part of a valid 'BI*'
|
99
|
+
* multi-token tagging.
|
100
|
+
*/
|
101
|
+
TokenTagging.prototype.bioGetTokenIndexForB = function(tokenIndex) {
|
102
|
+
if (this.bioGetBIOValue(tokenIndex) !== 'B' && this.bioGetBIOValue(tokenIndex) !== 'I') {
|
103
|
+
throw new TypeError("TokenTagging.getBIOTokenIndexForB expected a 'B' or 'I' tag at tokenIndex " + tokenIndex);
|
104
|
+
}
|
105
|
+
|
106
|
+
var bTokenIndex = tokenIndex;
|
107
|
+
while (this.bioGetBIOValue(bTokenIndex) === 'I') {
|
108
|
+
bTokenIndex -= 1;
|
109
|
+
}
|
110
|
+
if (this.bioGetBIOValue(bTokenIndex) !== 'B') {
|
111
|
+
throw new TypeError("TokenTagging.getBIOTokenIndex expected a 'B' tag at tokenIndex " +
|
112
|
+
bTokenIndex + ', but tag was "' +
|
113
|
+
this.getTaggedTokenWithTokenIndex(bTokenIndex) + "'");
|
114
|
+
}
|
115
|
+
return bTokenIndex;
|
116
|
+
};
|
117
|
+
|
118
|
+
/**
|
119
|
+
* Set BIO TaggedToken tag
|
120
|
+
*
|
121
|
+
* @param {String} bioValue - Should be 'B', 'I' or 'O'
|
122
|
+
* @param {String} tagText
|
123
|
+
* @param {Number} tokenIndex
|
124
|
+
* @throws {TypeError} Thrown if bioValue is not 'B'|'I'|'O'. Also thrown
|
125
|
+
* if bioValue is 'I', but not part of a valid 'BI*'
|
126
|
+
* multi-token tagging.
|
127
|
+
*/
|
128
|
+
TokenTagging.prototype.bioSetTaggedTokenTag = function(bioValue, tagText, tokenIndex) {
|
129
|
+
if (bioValue !== 'B' && bioValue !== 'I' && bioValue !== 'O') {
|
130
|
+
throw new TypeError("TokenTagging.bioSetTaggedTokenTag() expected bioValue to be 'B', 'I' or 'O', " +
|
131
|
+
"but instead it was '" + bioValue + "'");
|
132
|
+
}
|
133
|
+
|
134
|
+
var bioTagText;
|
135
|
+
if (bioValue === 'B') {
|
136
|
+
this.setTaggedTokenTag(bioValue + this.bioGetTagSeparator() + tagText, tokenIndex);
|
137
|
+
}
|
138
|
+
else if (bioValue === 'I') {
|
139
|
+
var bioPreviousValue = this.bioGetBIOValue(tokenIndex-1);
|
140
|
+
if (bioPreviousValue === 'B' || bioPreviousValue === 'I') {
|
141
|
+
// Get tag value from previous tag, ignore 'tagText' passed into function
|
142
|
+
this.setTaggedTokenTag(bioValue + this.bioGetTagSeparator() + this.bioGetTagValue(tokenIndex-1), tokenIndex);
|
143
|
+
}
|
144
|
+
else {
|
145
|
+
throw new TypeError("TokenTagging.bioSetTaggedTokenTag() encountered inconsistent BIO tagging " +
|
146
|
+
"at tokenIndex " + (tokenIndex-1));
|
147
|
+
}
|
148
|
+
}
|
149
|
+
else {
|
150
|
+
this.setTaggedTokenTag('O', tokenIndex);
|
151
|
+
}
|
152
|
+
|
153
|
+
var bioValueNext = this.bioGetBIOValue(tokenIndex+1);
|
154
|
+
var tagTextNext = this.bioGetTagValue(tokenIndex+1);
|
155
|
+
if (bioValueNext === 'I') {
|
156
|
+
if (bioValue === 'O') {
|
157
|
+
this.bioSetTaggedTokenTag('B', tagTextNext, tokenIndex+1);
|
158
|
+
}
|
159
|
+
else {
|
160
|
+
if (tagText !== tagTextNext) {
|
161
|
+
// Update tagText for all following 'I' tokens
|
162
|
+
this.bioSetTaggedTokenTag('I', tagText, tokenIndex+1);
|
163
|
+
}
|
164
|
+
}
|
165
|
+
}
|
166
|
+
};
|
167
|
+
|
168
|
+
/**
|
169
|
+
* For BIO TokenTaggings, sets separator character to be used between
|
170
|
+
* B/I/O character and rest of tag
|
171
|
+
*
|
172
|
+
* @param {String} separator - String used as separator character
|
173
|
+
*/
|
174
|
+
TokenTagging.prototype.bioSetTagSeparator = function(separator) {
|
175
|
+
this.bioTagSeparator = separator;
|
176
|
+
};
|
177
|
+
|
178
|
+
/**
|
179
|
+
* Return a deep copy of this TokenTagging's taggedTokenList.
|
180
|
+
*
|
181
|
+
* @returns {TaggedToken[]}
|
182
|
+
*/
|
183
|
+
TokenTagging.prototype.deepCopyTaggedTokenList = function() {
|
184
|
+
var taggedTokenListCopy = [];
|
185
|
+
for (var i = 0; i < this.taggedTokenList.length; i++) {
|
186
|
+
var taggedToken = new TaggedToken();
|
187
|
+
taggedTokenListCopy.push(jQuery.extend(true, taggedToken, this.taggedTokenList[i]));
|
188
|
+
}
|
189
|
+
return taggedTokenListCopy;
|
190
|
+
};
|
191
|
+
|
192
|
+
/**
|
193
|
+
* Return the TaggedToken (or null) with the specified tokenIndex
|
194
|
+
*
|
195
|
+
* @param {Number} tokenIndex
|
196
|
+
* @returns {TaggedToken|null}
|
197
|
+
*/
|
198
|
+
TokenTagging.prototype.getTaggedTokenWithTokenIndex = function(tokenIndex) {
|
199
|
+
for (var i = 0; i < this.taggedTokenList.length; i++) {
|
200
|
+
if (this.taggedTokenList[i].tokenIndex === tokenIndex) {
|
201
|
+
return this.taggedTokenList[i];
|
202
|
+
}
|
203
|
+
}
|
204
|
+
return null;
|
205
|
+
};
|
206
|
+
|
207
|
+
/**
|
208
|
+
* Set taggedTokenList to a list of TaggedTokens (one per token) with identical tags
|
209
|
+
*
|
210
|
+
* @param {Tokenization} tokenization - Used to determine # of TokenTags
|
211
|
+
* @param {String} tagText - Value for each TaggedToken's "tag" field
|
212
|
+
*/
|
213
|
+
TokenTagging.prototype.setAllTaggedTokenTags = function(tokenization, tagText) {
|
214
|
+
// Discard the contents of the existing taggedTokenList
|
215
|
+
this.taggedTokenList = [];
|
216
|
+
|
217
|
+
for (var i = 0; i < tokenization.tokenList.tokenList.length; i++) {
|
218
|
+
var taggedToken = new TaggedToken();
|
219
|
+
taggedToken.tag = tagText;
|
220
|
+
taggedToken.tokenIndex = i;
|
221
|
+
this.taggedTokenList.push(taggedToken);
|
222
|
+
}
|
223
|
+
};
|
224
|
+
|
225
|
+
/**
|
226
|
+
* Sets the tag of the TaggedToken with the specified tokenIndex.
|
227
|
+
* If a TaggedToken with the specified tokenIndex does not exist,
|
228
|
+
* than it will be created.
|
229
|
+
*
|
230
|
+
* @param {String} tagText
|
231
|
+
* @param {Number} tokenIndex
|
232
|
+
*/
|
233
|
+
TokenTagging.prototype.setTaggedTokenTag = function(tagText, tokenIndex) {
|
234
|
+
var taggedToken = this.getTaggedTokenWithTokenIndex(tokenIndex);
|
235
|
+
if (!taggedToken) {
|
236
|
+
taggedToken = new TaggedToken();
|
237
|
+
taggedToken.tokenIndex = tokenIndex;
|
238
|
+
this.taggedTokenList.push(taggedToken);
|
239
|
+
}
|
240
|
+
taggedToken.tag = tagText;
|
241
|
+
};
|