@ccmaymay/concrete 4.15.0 → 4.15.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ActiveLearnerClientService.js +249 -249
- package/ActiveLearnerServerService.js +642 -642
- package/AnnotateCommunicationService.js +696 -696
- package/AnnotateWithContextService.js +298 -298
- package/FeedbackService.js +750 -750
- package/FetchCommunicationService.js +709 -709
- package/README.md +125 -0
- package/ResultsServerService.js +2138 -2138
- package/SearchProxyService.js +962 -962
- package/SearchService.js +685 -685
- package/Service.js +373 -373
- package/StoreCommunicationService.js +255 -255
- package/SummarizationService.js +479 -479
- package/access_types.js +168 -168
- package/annotate_types.js +26 -26
- package/audio_types.js +110 -110
- package/cluster_types.js +398 -398
- package/communication_fu.js +432 -432
- package/communication_types.js +845 -845
- package/concrete.js +64 -64
- package/context_types.js +65 -65
- package/email_types.js +477 -477
- package/entities_types.js +658 -658
- package/ex_types.js +82 -82
- package/language_types.js +123 -123
- package/learn_types.js +207 -207
- package/linking_types.js +286 -286
- package/metadata_types.js +926 -926
- package/nitf_types.js +1005 -1005
- package/package.json +26 -5
- package/results_types.js +18 -18
- package/search_types.js +661 -661
- package/services_types.js +384 -384
- package/situations_types.js +1268 -1268
- package/spans_types.js +151 -151
- package/structure_types.js +2311 -2311
- package/summarization_types.js +433 -433
- package/tokenization_fu.js +33 -33
- package/tokentagging_fu.js +241 -241
- package/twitter_types.js +1553 -1553
- package/util.js +117 -117
- package/uuid_types.js +67 -67
package/tokenization_fu.js
CHANGED
@@ -1,33 +1,33 @@
|
|
1
|
-
/**
|
2
|
-
* @class Tokenization
|
3
|
-
* @classdesc concrete.js extensions to the Tokenization class
|
4
|
-
*/
|
5
|
-
const Tokenization = module.exports = require('./structure_types').Tokenization;
|
6
|
-
|
7
|
-
/**
|
8
|
-
* Add a TokenTagging to this Tokenization
|
9
|
-
* @param {TokenTagging} tokenTagging
|
10
|
-
*/
|
11
|
-
Tokenization.prototype.addTokenTagging = function(tokenTagging) {
|
12
|
-
if (!this.tokenTaggingList) {
|
13
|
-
this.tokenTaggingList = [];
|
14
|
-
}
|
15
|
-
this.tokenTaggingList.push(tokenTagging);
|
16
|
-
};
|
17
|
-
|
18
|
-
/**
|
19
|
-
* Get all TokenTaggings with the specified taggingType
|
20
|
-
* @param {String} taggingType - A string specifying a TokenTagging.taggingType
|
21
|
-
* @returns {Array} A (possibly empty) array of TokenTagging objects
|
22
|
-
*/
|
23
|
-
Tokenization.prototype.getTokenTaggingsOfType = function(taggingType) {
|
24
|
-
var tokenTaggings = [];
|
25
|
-
|
26
|
-
for (var tokenTaggingIndex in this.tokenTaggingList) {
|
27
|
-
if (this.tokenTaggingList[tokenTaggingIndex].taggingType === taggingType) {
|
28
|
-
tokenTaggings.push(this.tokenTaggingList[tokenTaggingIndex]);
|
29
|
-
}
|
30
|
-
}
|
31
|
-
|
32
|
-
return tokenTaggings;
|
33
|
-
};
|
1
|
+
/**
|
2
|
+
* @class Tokenization
|
3
|
+
* @classdesc concrete.js extensions to the Tokenization class
|
4
|
+
*/
|
5
|
+
const Tokenization = module.exports = require('./structure_types').Tokenization;
|
6
|
+
|
7
|
+
/**
|
8
|
+
* Add a TokenTagging to this Tokenization
|
9
|
+
* @param {TokenTagging} tokenTagging
|
10
|
+
*/
|
11
|
+
Tokenization.prototype.addTokenTagging = function(tokenTagging) {
|
12
|
+
if (!this.tokenTaggingList) {
|
13
|
+
this.tokenTaggingList = [];
|
14
|
+
}
|
15
|
+
this.tokenTaggingList.push(tokenTagging);
|
16
|
+
};
|
17
|
+
|
18
|
+
/**
|
19
|
+
* Get all TokenTaggings with the specified taggingType
|
20
|
+
* @param {String} taggingType - A string specifying a TokenTagging.taggingType
|
21
|
+
* @returns {Array} A (possibly empty) array of TokenTagging objects
|
22
|
+
*/
|
23
|
+
Tokenization.prototype.getTokenTaggingsOfType = function(taggingType) {
|
24
|
+
var tokenTaggings = [];
|
25
|
+
|
26
|
+
for (var tokenTaggingIndex in this.tokenTaggingList) {
|
27
|
+
if (this.tokenTaggingList[tokenTaggingIndex].taggingType === taggingType) {
|
28
|
+
tokenTaggings.push(this.tokenTaggingList[tokenTaggingIndex]);
|
29
|
+
}
|
30
|
+
}
|
31
|
+
|
32
|
+
return tokenTaggings;
|
33
|
+
};
|
package/tokentagging_fu.js
CHANGED
@@ -1,241 +1,241 @@
|
|
1
|
-
/**
|
2
|
-
* @class TokenTagging
|
3
|
-
* @classdesc concrete.js extensions to the TokenTagging class
|
4
|
-
*/
|
5
|
-
const concrete = {};
|
6
|
-
concrete.structure = require('./structure_types');
|
7
|
-
const TokenTagging = module.exports = concrete.structure.TokenTagging;
|
8
|
-
const TaggedToken = concrete.structure.TaggedToken;
|
9
|
-
concrete.metadata = require('./metadata_types');
|
10
|
-
const AnnotationMetadata = concrete.metadata.AnnotationMetadata;
|
11
|
-
|
12
|
-
const jQuery = {extend: function(){throw new Error("not implemented");}};
|
13
|
-
const $ = jQuery;
|
14
|
-
|
15
|
-
/**
|
16
|
-
* Create a valid TokenTagging with required fields AnnotationMetadata and UUID
|
17
|
-
*
|
18
|
-
* Example usage:
|
19
|
-
*
|
20
|
-
* tt = TokenTagging.create({taggingType: 'NER'}, {tool: 'HIT'})
|
21
|
-
*
|
22
|
-
* @param {Object} options - Override default TokenTagging fields (except metadata)
|
23
|
-
* @param {Object} metadataOptions - Override default tokenTagging.metadata fields
|
24
|
-
*/
|
25
|
-
TokenTagging.create = function(options, metadataOptions) {
|
26
|
-
var tokenTagging = new TokenTagging();
|
27
|
-
tokenTagging.metadata = new AnnotationMetadata();
|
28
|
-
tokenTagging.metadata.timestamp = Math.floor(Date.now()/1000);
|
29
|
-
tokenTagging.metadata.tool = 'concrete.js - TokenTagging.create()';
|
30
|
-
tokenTagging.taggedTokenList = [];
|
31
|
-
tokenTagging.taggingType = '';
|
32
|
-
tokenTagging.uuid = concrete.util.generateUUID();
|
33
|
-
|
34
|
-
tokenTagging = $.extend({}, tokenTagging, options);
|
35
|
-
tokenTagging.metadata = $.extend({}, tokenTagging.metadata, metadataOptions);
|
36
|
-
return tokenTagging;
|
37
|
-
};
|
38
|
-
|
39
|
-
/**
|
40
|
-
* Get BIO value for TaggedToken at tokenIndex
|
41
|
-
*
|
42
|
-
* @param {Integer] tokenIndex
|
43
|
-
* @returns {String|null} - 'B', 'I', 'O' or null
|
44
|
-
*/
|
45
|
-
TokenTagging.prototype.bioGetBIOValue = function(tokenIndex) {
|
46
|
-
var taggedToken = this.getTaggedTokenWithTokenIndex(tokenIndex);
|
47
|
-
if (taggedToken && taggedToken.tag) {
|
48
|
-
var firstChar = taggedToken.tag.charAt(0);
|
49
|
-
if (firstChar === 'B' || firstChar === 'I' || firstChar === 'O' ) {
|
50
|
-
return firstChar;
|
51
|
-
}
|
52
|
-
}
|
53
|
-
return null;
|
54
|
-
};
|
55
|
-
|
56
|
-
/**
|
57
|
-
* Get tag value (stripped of BIO tag and separator) for TaggedToken at tokenIndex
|
58
|
-
*
|
59
|
-
* @param {Integer] tokenIndex
|
60
|
-
* @returns {String|null} - 'B', 'I', 'O' or null
|
61
|
-
*
|
62
|
-
*/
|
63
|
-
TokenTagging.prototype.bioGetTagValue = function(tokenIndex) {
|
64
|
-
var taggedToken = this.getTaggedTokenWithTokenIndex(tokenIndex);
|
65
|
-
if (taggedToken && taggedToken.tag) {
|
66
|
-
return taggedToken.tag.substring(2);
|
67
|
-
}
|
68
|
-
return null;
|
69
|
-
};
|
70
|
-
|
71
|
-
/**
|
72
|
-
* Returns separator character for BIO TokenTaggings.
|
73
|
-
*
|
74
|
-
* If the separator character had not been set before this function was called,
|
75
|
-
* the separator character will be set to '-'.
|
76
|
-
*
|
77
|
-
* @returns {String} - Separator character for BIO TokenTaggings
|
78
|
-
*/
|
79
|
-
TokenTagging.prototype.bioGetTagSeparator = function() {
|
80
|
-
if (this.bioTagSeparator === undefined) {
|
81
|
-
this.bioTagSeparator = '-';
|
82
|
-
}
|
83
|
-
return this.bioTagSeparator;
|
84
|
-
};
|
85
|
-
|
86
|
-
/**
|
87
|
-
* Returns token index of 'B' tag for the (possibly multi-token) 'BI'
|
88
|
-
* tagging at the specified tokenIndex.
|
89
|
-
*
|
90
|
-
* If the tag at tokenIndex is a 'B' tag, return tokenIndex. If the
|
91
|
-
* tag at tokenIndex is an 'I' tag, find the index of the 'B' tag for
|
92
|
-
* this 'I' tag.
|
93
|
-
*
|
94
|
-
* @param {Number} tokenIndex -
|
95
|
-
* @returns {Number} - Token index of "B" tag
|
96
|
-
* @throws {TypeError} Thrown if the tag at TokenIndex is not a 'B' or
|
97
|
-
* 'I' tag. Also thrown if the tag at TokenIndex
|
98
|
-
* is a valid 'I' tag, but not part of a valid 'BI*'
|
99
|
-
* multi-token tagging.
|
100
|
-
*/
|
101
|
-
TokenTagging.prototype.bioGetTokenIndexForB = function(tokenIndex) {
|
102
|
-
if (this.bioGetBIOValue(tokenIndex) !== 'B' && this.bioGetBIOValue(tokenIndex) !== 'I') {
|
103
|
-
throw new TypeError("TokenTagging.getBIOTokenIndexForB expected a 'B' or 'I' tag at tokenIndex " + tokenIndex);
|
104
|
-
}
|
105
|
-
|
106
|
-
var bTokenIndex = tokenIndex;
|
107
|
-
while (this.bioGetBIOValue(bTokenIndex) === 'I') {
|
108
|
-
bTokenIndex -= 1;
|
109
|
-
}
|
110
|
-
if (this.bioGetBIOValue(bTokenIndex) !== 'B') {
|
111
|
-
throw new TypeError("TokenTagging.getBIOTokenIndex expected a 'B' tag at tokenIndex " +
|
112
|
-
bTokenIndex + ', but tag was "' +
|
113
|
-
this.getTaggedTokenWithTokenIndex(bTokenIndex) + "'");
|
114
|
-
}
|
115
|
-
return bTokenIndex;
|
116
|
-
};
|
117
|
-
|
118
|
-
/**
|
119
|
-
* Set BIO TaggedToken tag
|
120
|
-
*
|
121
|
-
* @param {String} bioValue - Should be 'B', 'I' or 'O'
|
122
|
-
* @param {String} tagText
|
123
|
-
* @param {Number} tokenIndex
|
124
|
-
* @throws {TypeError} Thrown if bioValue is not 'B'|'I'|'O'. Also thrown
|
125
|
-
* if bioValue is 'I', but not part of a valid 'BI*'
|
126
|
-
* multi-token tagging.
|
127
|
-
*/
|
128
|
-
TokenTagging.prototype.bioSetTaggedTokenTag = function(bioValue, tagText, tokenIndex) {
|
129
|
-
if (bioValue !== 'B' && bioValue !== 'I' && bioValue !== 'O') {
|
130
|
-
throw new TypeError("TokenTagging.bioSetTaggedTokenTag() expected bioValue to be 'B', 'I' or 'O', " +
|
131
|
-
"but instead it was '" + bioValue + "'");
|
132
|
-
}
|
133
|
-
|
134
|
-
var bioTagText;
|
135
|
-
if (bioValue === 'B') {
|
136
|
-
this.setTaggedTokenTag(bioValue + this.bioGetTagSeparator() + tagText, tokenIndex);
|
137
|
-
}
|
138
|
-
else if (bioValue === 'I') {
|
139
|
-
var bioPreviousValue = this.bioGetBIOValue(tokenIndex-1);
|
140
|
-
if (bioPreviousValue === 'B' || bioPreviousValue === 'I') {
|
141
|
-
// Get tag value from previous tag, ignore 'tagText' passed into function
|
142
|
-
this.setTaggedTokenTag(bioValue + this.bioGetTagSeparator() + this.bioGetTagValue(tokenIndex-1), tokenIndex);
|
143
|
-
}
|
144
|
-
else {
|
145
|
-
throw new TypeError("TokenTagging.bioSetTaggedTokenTag() encountered inconsistent BIO tagging " +
|
146
|
-
"at tokenIndex " + (tokenIndex-1));
|
147
|
-
}
|
148
|
-
}
|
149
|
-
else {
|
150
|
-
this.setTaggedTokenTag('O', tokenIndex);
|
151
|
-
}
|
152
|
-
|
153
|
-
var bioValueNext = this.bioGetBIOValue(tokenIndex+1);
|
154
|
-
var tagTextNext = this.bioGetTagValue(tokenIndex+1);
|
155
|
-
if (bioValueNext === 'I') {
|
156
|
-
if (bioValue === 'O') {
|
157
|
-
this.bioSetTaggedTokenTag('B', tagTextNext, tokenIndex+1);
|
158
|
-
}
|
159
|
-
else {
|
160
|
-
if (tagText !== tagTextNext) {
|
161
|
-
// Update tagText for all following 'I' tokens
|
162
|
-
this.bioSetTaggedTokenTag('I', tagText, tokenIndex+1);
|
163
|
-
}
|
164
|
-
}
|
165
|
-
}
|
166
|
-
};
|
167
|
-
|
168
|
-
/**
|
169
|
-
* For BIO TokenTaggings, sets separator character to be used between
|
170
|
-
* B/I/O character and rest of tag
|
171
|
-
*
|
172
|
-
* @param {String} separator - String used as separator character
|
173
|
-
*/
|
174
|
-
TokenTagging.prototype.bioSetTagSeparator = function(separator) {
|
175
|
-
this.bioTagSeparator = separator;
|
176
|
-
};
|
177
|
-
|
178
|
-
/**
|
179
|
-
* Return a deep copy of this TokenTagging's taggedTokenList.
|
180
|
-
*
|
181
|
-
* @returns {TaggedToken[]}
|
182
|
-
*/
|
183
|
-
TokenTagging.prototype.deepCopyTaggedTokenList = function() {
|
184
|
-
var taggedTokenListCopy = [];
|
185
|
-
for (var i = 0; i < this.taggedTokenList.length; i++) {
|
186
|
-
var taggedToken = new TaggedToken();
|
187
|
-
taggedTokenListCopy.push(jQuery.extend(true, taggedToken, this.taggedTokenList[i]));
|
188
|
-
}
|
189
|
-
return taggedTokenListCopy;
|
190
|
-
};
|
191
|
-
|
192
|
-
/**
|
193
|
-
* Return the TaggedToken (or null) with the specified tokenIndex
|
194
|
-
*
|
195
|
-
* @param {Number} tokenIndex
|
196
|
-
* @returns {TaggedToken|null}
|
197
|
-
*/
|
198
|
-
TokenTagging.prototype.getTaggedTokenWithTokenIndex = function(tokenIndex) {
|
199
|
-
for (var i = 0; i < this.taggedTokenList.length; i++) {
|
200
|
-
if (this.taggedTokenList[i].tokenIndex === tokenIndex) {
|
201
|
-
return this.taggedTokenList[i];
|
202
|
-
}
|
203
|
-
}
|
204
|
-
return null;
|
205
|
-
};
|
206
|
-
|
207
|
-
/**
|
208
|
-
* Set taggedTokenList to a list of TaggedTokens (one per token) with identical tags
|
209
|
-
*
|
210
|
-
* @param {Tokenization} tokenization - Used to determine # of TokenTags
|
211
|
-
* @param {String} tagText - Value for each TaggedToken's "tag" field
|
212
|
-
*/
|
213
|
-
TokenTagging.prototype.setAllTaggedTokenTags = function(tokenization, tagText) {
|
214
|
-
// Discard the contents of the existing taggedTokenList
|
215
|
-
this.taggedTokenList = [];
|
216
|
-
|
217
|
-
for (var i = 0; i < tokenization.tokenList.tokenList.length; i++) {
|
218
|
-
var taggedToken = new TaggedToken();
|
219
|
-
taggedToken.tag = tagText;
|
220
|
-
taggedToken.tokenIndex = i;
|
221
|
-
this.taggedTokenList.push(taggedToken);
|
222
|
-
}
|
223
|
-
};
|
224
|
-
|
225
|
-
/**
|
226
|
-
* Sets the tag of the TaggedToken with the specified tokenIndex.
|
227
|
-
* If a TaggedToken with the specified tokenIndex does not exist,
|
228
|
-
* than it will be created.
|
229
|
-
*
|
230
|
-
* @param {String} tagText
|
231
|
-
* @param {Number} tokenIndex
|
232
|
-
*/
|
233
|
-
TokenTagging.prototype.setTaggedTokenTag = function(tagText, tokenIndex) {
|
234
|
-
var taggedToken = this.getTaggedTokenWithTokenIndex(tokenIndex);
|
235
|
-
if (!taggedToken) {
|
236
|
-
taggedToken = new TaggedToken();
|
237
|
-
taggedToken.tokenIndex = tokenIndex;
|
238
|
-
this.taggedTokenList.push(taggedToken);
|
239
|
-
}
|
240
|
-
taggedToken.tag = tagText;
|
241
|
-
};
|
1
|
+
/**
|
2
|
+
* @class TokenTagging
|
3
|
+
* @classdesc concrete.js extensions to the TokenTagging class
|
4
|
+
*/
|
5
|
+
const concrete = {};
|
6
|
+
concrete.structure = require('./structure_types');
|
7
|
+
const TokenTagging = module.exports = concrete.structure.TokenTagging;
|
8
|
+
const TaggedToken = concrete.structure.TaggedToken;
|
9
|
+
concrete.metadata = require('./metadata_types');
|
10
|
+
const AnnotationMetadata = concrete.metadata.AnnotationMetadata;
|
11
|
+
|
12
|
+
const jQuery = {extend: function(){throw new Error("not implemented");}};
|
13
|
+
const $ = jQuery;
|
14
|
+
|
15
|
+
/**
|
16
|
+
* Create a valid TokenTagging with required fields AnnotationMetadata and UUID
|
17
|
+
*
|
18
|
+
* Example usage:
|
19
|
+
*
|
20
|
+
* tt = TokenTagging.create({taggingType: 'NER'}, {tool: 'HIT'})
|
21
|
+
*
|
22
|
+
* @param {Object} options - Override default TokenTagging fields (except metadata)
|
23
|
+
* @param {Object} metadataOptions - Override default tokenTagging.metadata fields
|
24
|
+
*/
|
25
|
+
TokenTagging.create = function(options, metadataOptions) {
|
26
|
+
var tokenTagging = new TokenTagging();
|
27
|
+
tokenTagging.metadata = new AnnotationMetadata();
|
28
|
+
tokenTagging.metadata.timestamp = Math.floor(Date.now()/1000);
|
29
|
+
tokenTagging.metadata.tool = 'concrete.js - TokenTagging.create()';
|
30
|
+
tokenTagging.taggedTokenList = [];
|
31
|
+
tokenTagging.taggingType = '';
|
32
|
+
tokenTagging.uuid = concrete.util.generateUUID();
|
33
|
+
|
34
|
+
tokenTagging = $.extend({}, tokenTagging, options);
|
35
|
+
tokenTagging.metadata = $.extend({}, tokenTagging.metadata, metadataOptions);
|
36
|
+
return tokenTagging;
|
37
|
+
};
|
38
|
+
|
39
|
+
/**
|
40
|
+
* Get BIO value for TaggedToken at tokenIndex
|
41
|
+
*
|
42
|
+
* @param {Integer] tokenIndex
|
43
|
+
* @returns {String|null} - 'B', 'I', 'O' or null
|
44
|
+
*/
|
45
|
+
TokenTagging.prototype.bioGetBIOValue = function(tokenIndex) {
|
46
|
+
var taggedToken = this.getTaggedTokenWithTokenIndex(tokenIndex);
|
47
|
+
if (taggedToken && taggedToken.tag) {
|
48
|
+
var firstChar = taggedToken.tag.charAt(0);
|
49
|
+
if (firstChar === 'B' || firstChar === 'I' || firstChar === 'O' ) {
|
50
|
+
return firstChar;
|
51
|
+
}
|
52
|
+
}
|
53
|
+
return null;
|
54
|
+
};
|
55
|
+
|
56
|
+
/**
|
57
|
+
* Get tag value (stripped of BIO tag and separator) for TaggedToken at tokenIndex
|
58
|
+
*
|
59
|
+
* @param {Integer] tokenIndex
|
60
|
+
* @returns {String|null} - 'B', 'I', 'O' or null
|
61
|
+
*
|
62
|
+
*/
|
63
|
+
TokenTagging.prototype.bioGetTagValue = function(tokenIndex) {
|
64
|
+
var taggedToken = this.getTaggedTokenWithTokenIndex(tokenIndex);
|
65
|
+
if (taggedToken && taggedToken.tag) {
|
66
|
+
return taggedToken.tag.substring(2);
|
67
|
+
}
|
68
|
+
return null;
|
69
|
+
};
|
70
|
+
|
71
|
+
/**
|
72
|
+
* Returns separator character for BIO TokenTaggings.
|
73
|
+
*
|
74
|
+
* If the separator character had not been set before this function was called,
|
75
|
+
* the separator character will be set to '-'.
|
76
|
+
*
|
77
|
+
* @returns {String} - Separator character for BIO TokenTaggings
|
78
|
+
*/
|
79
|
+
TokenTagging.prototype.bioGetTagSeparator = function() {
|
80
|
+
if (this.bioTagSeparator === undefined) {
|
81
|
+
this.bioTagSeparator = '-';
|
82
|
+
}
|
83
|
+
return this.bioTagSeparator;
|
84
|
+
};
|
85
|
+
|
86
|
+
/**
|
87
|
+
* Returns token index of 'B' tag for the (possibly multi-token) 'BI'
|
88
|
+
* tagging at the specified tokenIndex.
|
89
|
+
*
|
90
|
+
* If the tag at tokenIndex is a 'B' tag, return tokenIndex. If the
|
91
|
+
* tag at tokenIndex is an 'I' tag, find the index of the 'B' tag for
|
92
|
+
* this 'I' tag.
|
93
|
+
*
|
94
|
+
* @param {Number} tokenIndex -
|
95
|
+
* @returns {Number} - Token index of "B" tag
|
96
|
+
* @throws {TypeError} Thrown if the tag at TokenIndex is not a 'B' or
|
97
|
+
* 'I' tag. Also thrown if the tag at TokenIndex
|
98
|
+
* is a valid 'I' tag, but not part of a valid 'BI*'
|
99
|
+
* multi-token tagging.
|
100
|
+
*/
|
101
|
+
TokenTagging.prototype.bioGetTokenIndexForB = function(tokenIndex) {
|
102
|
+
if (this.bioGetBIOValue(tokenIndex) !== 'B' && this.bioGetBIOValue(tokenIndex) !== 'I') {
|
103
|
+
throw new TypeError("TokenTagging.getBIOTokenIndexForB expected a 'B' or 'I' tag at tokenIndex " + tokenIndex);
|
104
|
+
}
|
105
|
+
|
106
|
+
var bTokenIndex = tokenIndex;
|
107
|
+
while (this.bioGetBIOValue(bTokenIndex) === 'I') {
|
108
|
+
bTokenIndex -= 1;
|
109
|
+
}
|
110
|
+
if (this.bioGetBIOValue(bTokenIndex) !== 'B') {
|
111
|
+
throw new TypeError("TokenTagging.getBIOTokenIndex expected a 'B' tag at tokenIndex " +
|
112
|
+
bTokenIndex + ', but tag was "' +
|
113
|
+
this.getTaggedTokenWithTokenIndex(bTokenIndex) + "'");
|
114
|
+
}
|
115
|
+
return bTokenIndex;
|
116
|
+
};
|
117
|
+
|
118
|
+
/**
|
119
|
+
* Set BIO TaggedToken tag
|
120
|
+
*
|
121
|
+
* @param {String} bioValue - Should be 'B', 'I' or 'O'
|
122
|
+
* @param {String} tagText
|
123
|
+
* @param {Number} tokenIndex
|
124
|
+
* @throws {TypeError} Thrown if bioValue is not 'B'|'I'|'O'. Also thrown
|
125
|
+
* if bioValue is 'I', but not part of a valid 'BI*'
|
126
|
+
* multi-token tagging.
|
127
|
+
*/
|
128
|
+
TokenTagging.prototype.bioSetTaggedTokenTag = function(bioValue, tagText, tokenIndex) {
|
129
|
+
if (bioValue !== 'B' && bioValue !== 'I' && bioValue !== 'O') {
|
130
|
+
throw new TypeError("TokenTagging.bioSetTaggedTokenTag() expected bioValue to be 'B', 'I' or 'O', " +
|
131
|
+
"but instead it was '" + bioValue + "'");
|
132
|
+
}
|
133
|
+
|
134
|
+
var bioTagText;
|
135
|
+
if (bioValue === 'B') {
|
136
|
+
this.setTaggedTokenTag(bioValue + this.bioGetTagSeparator() + tagText, tokenIndex);
|
137
|
+
}
|
138
|
+
else if (bioValue === 'I') {
|
139
|
+
var bioPreviousValue = this.bioGetBIOValue(tokenIndex-1);
|
140
|
+
if (bioPreviousValue === 'B' || bioPreviousValue === 'I') {
|
141
|
+
// Get tag value from previous tag, ignore 'tagText' passed into function
|
142
|
+
this.setTaggedTokenTag(bioValue + this.bioGetTagSeparator() + this.bioGetTagValue(tokenIndex-1), tokenIndex);
|
143
|
+
}
|
144
|
+
else {
|
145
|
+
throw new TypeError("TokenTagging.bioSetTaggedTokenTag() encountered inconsistent BIO tagging " +
|
146
|
+
"at tokenIndex " + (tokenIndex-1));
|
147
|
+
}
|
148
|
+
}
|
149
|
+
else {
|
150
|
+
this.setTaggedTokenTag('O', tokenIndex);
|
151
|
+
}
|
152
|
+
|
153
|
+
var bioValueNext = this.bioGetBIOValue(tokenIndex+1);
|
154
|
+
var tagTextNext = this.bioGetTagValue(tokenIndex+1);
|
155
|
+
if (bioValueNext === 'I') {
|
156
|
+
if (bioValue === 'O') {
|
157
|
+
this.bioSetTaggedTokenTag('B', tagTextNext, tokenIndex+1);
|
158
|
+
}
|
159
|
+
else {
|
160
|
+
if (tagText !== tagTextNext) {
|
161
|
+
// Update tagText for all following 'I' tokens
|
162
|
+
this.bioSetTaggedTokenTag('I', tagText, tokenIndex+1);
|
163
|
+
}
|
164
|
+
}
|
165
|
+
}
|
166
|
+
};
|
167
|
+
|
168
|
+
/**
|
169
|
+
* For BIO TokenTaggings, sets separator character to be used between
|
170
|
+
* B/I/O character and rest of tag
|
171
|
+
*
|
172
|
+
* @param {String} separator - String used as separator character
|
173
|
+
*/
|
174
|
+
TokenTagging.prototype.bioSetTagSeparator = function(separator) {
|
175
|
+
this.bioTagSeparator = separator;
|
176
|
+
};
|
177
|
+
|
178
|
+
/**
|
179
|
+
* Return a deep copy of this TokenTagging's taggedTokenList.
|
180
|
+
*
|
181
|
+
* @returns {TaggedToken[]}
|
182
|
+
*/
|
183
|
+
TokenTagging.prototype.deepCopyTaggedTokenList = function() {
|
184
|
+
var taggedTokenListCopy = [];
|
185
|
+
for (var i = 0; i < this.taggedTokenList.length; i++) {
|
186
|
+
var taggedToken = new TaggedToken();
|
187
|
+
taggedTokenListCopy.push(jQuery.extend(true, taggedToken, this.taggedTokenList[i]));
|
188
|
+
}
|
189
|
+
return taggedTokenListCopy;
|
190
|
+
};
|
191
|
+
|
192
|
+
/**
|
193
|
+
* Return the TaggedToken (or null) with the specified tokenIndex
|
194
|
+
*
|
195
|
+
* @param {Number} tokenIndex
|
196
|
+
* @returns {TaggedToken|null}
|
197
|
+
*/
|
198
|
+
TokenTagging.prototype.getTaggedTokenWithTokenIndex = function(tokenIndex) {
|
199
|
+
for (var i = 0; i < this.taggedTokenList.length; i++) {
|
200
|
+
if (this.taggedTokenList[i].tokenIndex === tokenIndex) {
|
201
|
+
return this.taggedTokenList[i];
|
202
|
+
}
|
203
|
+
}
|
204
|
+
return null;
|
205
|
+
};
|
206
|
+
|
207
|
+
/**
|
208
|
+
* Set taggedTokenList to a list of TaggedTokens (one per token) with identical tags
|
209
|
+
*
|
210
|
+
* @param {Tokenization} tokenization - Used to determine # of TokenTags
|
211
|
+
* @param {String} tagText - Value for each TaggedToken's "tag" field
|
212
|
+
*/
|
213
|
+
TokenTagging.prototype.setAllTaggedTokenTags = function(tokenization, tagText) {
|
214
|
+
// Discard the contents of the existing taggedTokenList
|
215
|
+
this.taggedTokenList = [];
|
216
|
+
|
217
|
+
for (var i = 0; i < tokenization.tokenList.tokenList.length; i++) {
|
218
|
+
var taggedToken = new TaggedToken();
|
219
|
+
taggedToken.tag = tagText;
|
220
|
+
taggedToken.tokenIndex = i;
|
221
|
+
this.taggedTokenList.push(taggedToken);
|
222
|
+
}
|
223
|
+
};
|
224
|
+
|
225
|
+
/**
|
226
|
+
* Sets the tag of the TaggedToken with the specified tokenIndex.
|
227
|
+
* If a TaggedToken with the specified tokenIndex does not exist,
|
228
|
+
* than it will be created.
|
229
|
+
*
|
230
|
+
* @param {String} tagText
|
231
|
+
* @param {Number} tokenIndex
|
232
|
+
*/
|
233
|
+
TokenTagging.prototype.setTaggedTokenTag = function(tagText, tokenIndex) {
|
234
|
+
var taggedToken = this.getTaggedTokenWithTokenIndex(tokenIndex);
|
235
|
+
if (!taggedToken) {
|
236
|
+
taggedToken = new TaggedToken();
|
237
|
+
taggedToken.tokenIndex = tokenIndex;
|
238
|
+
this.taggedTokenList.push(taggedToken);
|
239
|
+
}
|
240
|
+
taggedToken.tag = tagText;
|
241
|
+
};
|