@elanlanguages/bridge-anonymization 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +73 -1
- package/dist/crypto/pii-map-crypto.d.ts.map +1 -1
- package/dist/crypto/pii-map-crypto.js +8 -8
- package/dist/crypto/pii-map-crypto.js.map +1 -1
- package/dist/index.d.ts +25 -20
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +103 -52
- package/dist/index.js.map +1 -1
- package/dist/ner/model-manager.d.ts.map +1 -1
- package/dist/ner/model-manager.js +10 -8
- package/dist/ner/model-manager.js.map +1 -1
- package/dist/ner/ner-model.d.ts.map +1 -1
- package/dist/ner/ner-model.js +10 -10
- package/dist/ner/ner-model.js.map +1 -1
- package/dist/ner/onnx-runtime.d.ts +3 -3
- package/dist/ner/onnx-runtime.d.ts.map +1 -1
- package/dist/ner/onnx-runtime.js +1 -1
- package/dist/ner/onnx-runtime.js.map +1 -1
- package/dist/ner/tokenizer.d.ts +26 -53
- package/dist/ner/tokenizer.d.ts.map +1 -1
- package/dist/ner/tokenizer.js +174 -196
- package/dist/ner/tokenizer.js.map +1 -1
- package/dist/pipeline/index.d.ts +7 -4
- package/dist/pipeline/index.d.ts.map +1 -1
- package/dist/pipeline/index.js +7 -4
- package/dist/pipeline/index.js.map +1 -1
- package/dist/pipeline/resolver.d.ts.map +1 -1
- package/dist/pipeline/resolver.js +3 -2
- package/dist/pipeline/resolver.js.map +1 -1
- package/dist/pipeline/semantic-data-loader.d.ts +157 -0
- package/dist/pipeline/semantic-data-loader.d.ts.map +1 -0
- package/dist/pipeline/semantic-data-loader.js +662 -0
- package/dist/pipeline/semantic-data-loader.js.map +1 -0
- package/dist/pipeline/semantic-enricher.d.ts +102 -0
- package/dist/pipeline/semantic-enricher.d.ts.map +1 -0
- package/dist/pipeline/semantic-enricher.js +268 -0
- package/dist/pipeline/semantic-enricher.js.map +1 -0
- package/dist/pipeline/tagger.d.ts +52 -12
- package/dist/pipeline/tagger.d.ts.map +1 -1
- package/dist/pipeline/tagger.js +226 -21
- package/dist/pipeline/tagger.js.map +1 -1
- package/dist/pipeline/title-extractor.d.ts +79 -0
- package/dist/pipeline/title-extractor.d.ts.map +1 -0
- package/dist/pipeline/title-extractor.js +801 -0
- package/dist/pipeline/title-extractor.js.map +1 -0
- package/dist/types/index.d.ts +66 -3
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/index.js +14 -3
- package/dist/types/index.js.map +1 -1
- package/dist/utils/index.d.ts +3 -3
- package/dist/utils/index.js +3 -3
- package/package.json +7 -5
package/dist/ner/tokenizer.js
CHANGED
|
@@ -1,33 +1,33 @@
|
|
|
1
1
|
/**
|
|
2
|
-
*
|
|
3
|
-
*
|
|
4
|
-
*
|
|
2
|
+
* HuggingFace Tokenizer
|
|
3
|
+
* Loads and uses tokenizers from HuggingFace's tokenizer.json format
|
|
4
|
+
* Supports Unigram (SentencePiece) and BPE tokenizers
|
|
5
5
|
*/
|
|
6
6
|
/**
|
|
7
|
-
* Default tokenizer configuration
|
|
7
|
+
* Default tokenizer configuration
|
|
8
8
|
*/
|
|
9
9
|
export const DEFAULT_TOKENIZER_CONFIG = {
|
|
10
10
|
maxLength: 512,
|
|
11
|
-
|
|
12
|
-
clsToken: '[CLS]',
|
|
13
|
-
sepToken: '[SEP]',
|
|
14
|
-
padToken: '[PAD]',
|
|
15
|
-
maskToken: '[MASK]',
|
|
16
|
-
doLowerCase: true,
|
|
17
|
-
stripAccents: true,
|
|
11
|
+
doLowerCase: false, // XLM-RoBERTa doesn't lowercase
|
|
18
12
|
};
|
|
19
13
|
/**
|
|
20
|
-
* WordPiece Tokenizer
|
|
14
|
+
* WordPiece Tokenizer - supports both HuggingFace JSON and vocab.txt formats
|
|
21
15
|
*/
|
|
22
16
|
export class WordPieceTokenizer {
|
|
23
17
|
vocab;
|
|
24
18
|
inverseVocab;
|
|
25
19
|
config;
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
clsId;
|
|
29
|
-
sepId;
|
|
30
|
-
padId;
|
|
20
|
+
sortedVocab;
|
|
21
|
+
// Special token IDs (XLM-RoBERTa style)
|
|
22
|
+
clsId = 0; // <s>
|
|
23
|
+
sepId = 2; // </s>
|
|
24
|
+
padId = 1; // <pad>
|
|
25
|
+
unkId = 3; // <unk>
|
|
26
|
+
// Special token strings
|
|
27
|
+
clsToken = '<s>';
|
|
28
|
+
sepToken = '</s>';
|
|
29
|
+
padToken = '<pad>';
|
|
30
|
+
unkToken = '<unk>';
|
|
31
31
|
constructor(vocab, config = {}) {
|
|
32
32
|
this.vocab = vocab;
|
|
33
33
|
this.config = { ...DEFAULT_TOKENIZER_CONFIG, ...config };
|
|
@@ -36,11 +36,37 @@ export class WordPieceTokenizer {
|
|
|
36
36
|
for (const [token, id] of vocab) {
|
|
37
37
|
this.inverseVocab.set(id, token);
|
|
38
38
|
}
|
|
39
|
-
//
|
|
40
|
-
this.
|
|
41
|
-
|
|
42
|
-
this.
|
|
43
|
-
|
|
39
|
+
// Sort vocab by token length (longest first) for greedy matching
|
|
40
|
+
this.sortedVocab = Array.from(vocab.entries()).sort((a, b) => b[0].length - a[0].length);
|
|
41
|
+
// Try to detect special tokens from vocab
|
|
42
|
+
this.detectSpecialTokens();
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Detect special tokens from vocabulary
|
|
46
|
+
*/
|
|
47
|
+
detectSpecialTokens() {
|
|
48
|
+
// XLM-RoBERTa style
|
|
49
|
+
if (this.vocab.has('<s>')) {
|
|
50
|
+
this.clsToken = '<s>';
|
|
51
|
+
this.clsId = this.vocab.get('<s>') ?? 0;
|
|
52
|
+
this.sepToken = '</s>';
|
|
53
|
+
this.sepId = this.vocab.get('</s>') ?? 2;
|
|
54
|
+
this.padToken = '<pad>';
|
|
55
|
+
this.padId = this.vocab.get('<pad>') ?? 1;
|
|
56
|
+
this.unkToken = '<unk>';
|
|
57
|
+
this.unkId = this.vocab.get('<unk>') ?? 3;
|
|
58
|
+
}
|
|
59
|
+
// BERT style
|
|
60
|
+
else if (this.vocab.has('[CLS]')) {
|
|
61
|
+
this.clsToken = '[CLS]';
|
|
62
|
+
this.clsId = this.vocab.get('[CLS]') ?? 101;
|
|
63
|
+
this.sepToken = '[SEP]';
|
|
64
|
+
this.sepId = this.vocab.get('[SEP]') ?? 102;
|
|
65
|
+
this.padToken = '[PAD]';
|
|
66
|
+
this.padId = this.vocab.get('[PAD]') ?? 0;
|
|
67
|
+
this.unkToken = '[UNK]';
|
|
68
|
+
this.unkId = this.vocab.get('[UNK]') ?? 100;
|
|
69
|
+
}
|
|
44
70
|
}
|
|
45
71
|
/**
|
|
46
72
|
* Tokenizes text into tokens with offset tracking
|
|
@@ -48,10 +74,10 @@ export class WordPieceTokenizer {
|
|
|
48
74
|
tokenize(text) {
|
|
49
75
|
const tokens = [];
|
|
50
76
|
const tokenToCharSpan = [];
|
|
51
|
-
// Add
|
|
77
|
+
// Add CLS token
|
|
52
78
|
tokens.push({
|
|
53
79
|
id: this.clsId,
|
|
54
|
-
token: this.
|
|
80
|
+
token: this.clsToken,
|
|
55
81
|
start: 0,
|
|
56
82
|
end: 0,
|
|
57
83
|
isContinuation: false,
|
|
@@ -59,21 +85,33 @@ export class WordPieceTokenizer {
|
|
|
59
85
|
});
|
|
60
86
|
tokenToCharSpan.push(null);
|
|
61
87
|
// Preprocess text
|
|
62
|
-
const processedText = this.
|
|
63
|
-
//
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
tokenToCharSpan.push([t.start, t.end]);
|
|
88
|
+
const processedText = this.config.doLowerCase ? text.toLowerCase() : text;
|
|
89
|
+
// Tokenize using greedy longest-match
|
|
90
|
+
let pos = 0;
|
|
91
|
+
while (pos < processedText.length) {
|
|
92
|
+
// Skip whitespace
|
|
93
|
+
if (/\s/.test(processedText[pos])) {
|
|
94
|
+
pos++;
|
|
95
|
+
continue;
|
|
71
96
|
}
|
|
97
|
+
// Find the longest matching token starting at this position
|
|
98
|
+
const { token, id, length } = this.findBestToken(processedText, pos);
|
|
99
|
+
const isFirstOfWord = pos === 0 || /\s/.test(processedText[pos - 1]);
|
|
100
|
+
tokens.push({
|
|
101
|
+
id,
|
|
102
|
+
token,
|
|
103
|
+
start: pos,
|
|
104
|
+
end: pos + length,
|
|
105
|
+
isContinuation: !isFirstOfWord && !token.startsWith('▁'),
|
|
106
|
+
isSpecial: false,
|
|
107
|
+
});
|
|
108
|
+
tokenToCharSpan.push([pos, pos + length]);
|
|
109
|
+
pos += length;
|
|
72
110
|
}
|
|
73
|
-
// Add
|
|
111
|
+
// Add SEP token
|
|
74
112
|
tokens.push({
|
|
75
113
|
id: this.sepId,
|
|
76
|
-
token: this.
|
|
114
|
+
token: this.sepToken,
|
|
77
115
|
start: text.length,
|
|
78
116
|
end: text.length,
|
|
79
117
|
isContinuation: false,
|
|
@@ -85,10 +123,9 @@ export class WordPieceTokenizer {
|
|
|
85
123
|
if (tokens.length > maxTokens) {
|
|
86
124
|
tokens.length = maxTokens - 1;
|
|
87
125
|
tokenToCharSpan.length = maxTokens - 1;
|
|
88
|
-
// Add [SEP] at end
|
|
89
126
|
tokens.push({
|
|
90
127
|
id: this.sepId,
|
|
91
|
-
token: this.
|
|
128
|
+
token: this.sepToken,
|
|
92
129
|
start: text.length,
|
|
93
130
|
end: text.length,
|
|
94
131
|
isContinuation: false,
|
|
@@ -109,161 +146,66 @@ export class WordPieceTokenizer {
|
|
|
109
146
|
};
|
|
110
147
|
}
|
|
111
148
|
/**
|
|
112
|
-
*
|
|
113
|
-
*/
|
|
114
|
-
preprocess(text) {
|
|
115
|
-
let processed = text;
|
|
116
|
-
if (this.config.doLowerCase) {
|
|
117
|
-
processed = processed.toLowerCase();
|
|
118
|
-
}
|
|
119
|
-
if (this.config.stripAccents) {
|
|
120
|
-
processed = this.stripAccents(processed);
|
|
121
|
-
}
|
|
122
|
-
return processed;
|
|
123
|
-
}
|
|
124
|
-
/**
|
|
125
|
-
* Strips accents from text
|
|
126
|
-
*/
|
|
127
|
-
stripAccents(text) {
|
|
128
|
-
return text.normalize('NFD').replace(/[\u0300-\u036f]/g, '');
|
|
129
|
-
}
|
|
130
|
-
/**
|
|
131
|
-
* Splits text into words while tracking character offsets
|
|
132
|
-
*/
|
|
133
|
-
splitIntoWords(processedText, originalText) {
|
|
134
|
-
const words = [];
|
|
135
|
-
// Split on whitespace and punctuation while keeping track of positions
|
|
136
|
-
const wordPattern = /\S+/g;
|
|
137
|
-
let match;
|
|
138
|
-
while ((match = wordPattern.exec(processedText)) !== null) {
|
|
139
|
-
// Find corresponding position in original text
|
|
140
|
-
// Since we may have lowercased, we need to map positions
|
|
141
|
-
const start = match.index;
|
|
142
|
-
const end = start + match[0].length;
|
|
143
|
-
words.push({
|
|
144
|
-
word: match[0],
|
|
145
|
-
start,
|
|
146
|
-
end,
|
|
147
|
-
});
|
|
148
|
-
}
|
|
149
|
-
return words;
|
|
150
|
-
}
|
|
151
|
-
/**
|
|
152
|
-
* Tokenizes a single word using WordPiece algorithm
|
|
153
|
-
*/
|
|
154
|
-
tokenizeWord(word, startOffset, endOffset) {
|
|
155
|
-
const tokens = [];
|
|
156
|
-
// Handle punctuation separately
|
|
157
|
-
const subwords = this.splitWordIntoPieces(word);
|
|
158
|
-
let currentOffset = startOffset;
|
|
159
|
-
for (let i = 0; i < subwords.length; i++) {
|
|
160
|
-
let subword = subwords[i];
|
|
161
|
-
const isContinuation = i > 0;
|
|
162
|
-
// For continuation tokens, add ## prefix for vocab lookup
|
|
163
|
-
const vocabKey = isContinuation ? '##' + subword : subword;
|
|
164
|
-
// Look up in vocabulary
|
|
165
|
-
let tokenId = this.vocab.get(vocabKey);
|
|
166
|
-
// If not found, try to find longest matching prefix
|
|
167
|
-
if (tokenId === undefined) {
|
|
168
|
-
const { id, token } = this.findLongestMatch(subword, isContinuation);
|
|
169
|
-
tokenId = id;
|
|
170
|
-
subword = token;
|
|
171
|
-
}
|
|
172
|
-
const tokenLength = subword.length;
|
|
173
|
-
const tokenEnd = Math.min(currentOffset + tokenLength, endOffset);
|
|
174
|
-
tokens.push({
|
|
175
|
-
id: tokenId,
|
|
176
|
-
token: isContinuation ? '##' + subword : subword,
|
|
177
|
-
start: currentOffset,
|
|
178
|
-
end: tokenEnd,
|
|
179
|
-
isContinuation,
|
|
180
|
-
isSpecial: false,
|
|
181
|
-
});
|
|
182
|
-
currentOffset = tokenEnd;
|
|
183
|
-
}
|
|
184
|
-
return tokens;
|
|
185
|
-
}
|
|
186
|
-
/**
|
|
187
|
-
* Splits a word into pieces, handling punctuation
|
|
149
|
+
* Find the best matching token using greedy longest-match
|
|
188
150
|
*/
|
|
189
|
-
|
|
190
|
-
const
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
151
|
+
findBestToken(text, startPos) {
|
|
152
|
+
const remaining = text.slice(startPos);
|
|
153
|
+
// Check if this starts a new word (preceded by space or start)
|
|
154
|
+
const isWordStart = startPos === 0 || /\s/.test(text[startPos - 1]);
|
|
155
|
+
// For SentencePiece models, word-initial tokens start with ▁
|
|
156
|
+
if (isWordStart) {
|
|
157
|
+
// Try with ▁ prefix first
|
|
158
|
+
const withPrefix = '▁' + remaining;
|
|
159
|
+
for (const [vocabToken, id] of this.sortedVocab) {
|
|
160
|
+
if (withPrefix.startsWith(vocabToken)) {
|
|
161
|
+
// Return the match length without the ▁ since that's not in original text
|
|
162
|
+
return {
|
|
163
|
+
token: vocabToken,
|
|
164
|
+
id,
|
|
165
|
+
length: vocabToken.length - 1 // Subtract 1 for the ▁
|
|
166
|
+
};
|
|
197
167
|
}
|
|
198
|
-
pieces.push(char);
|
|
199
168
|
}
|
|
200
|
-
else {
|
|
201
|
-
current += char;
|
|
202
|
-
}
|
|
203
|
-
}
|
|
204
|
-
if (current.length > 0) {
|
|
205
|
-
pieces.push(current);
|
|
206
169
|
}
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
/[\u2000-\u206F]/.test(char) || // General punctuation
|
|
220
|
-
/[\u3000-\u303F]/.test(char) // CJK punctuation
|
|
221
|
-
);
|
|
222
|
-
}
|
|
223
|
-
/**
|
|
224
|
-
* Finds the longest matching token in vocabulary
|
|
225
|
-
*/
|
|
226
|
-
findLongestMatch(word, isContinuation) {
|
|
227
|
-
const prefix = isContinuation ? '##' : '';
|
|
228
|
-
// Try progressively shorter substrings
|
|
229
|
-
for (let end = word.length; end > 0; end--) {
|
|
230
|
-
const subword = word.slice(0, end);
|
|
231
|
-
const vocabKey = prefix + subword;
|
|
232
|
-
const id = this.vocab.get(vocabKey);
|
|
233
|
-
if (id !== undefined) {
|
|
234
|
-
return { id, token: subword };
|
|
170
|
+
// Try exact match without prefix
|
|
171
|
+
for (const [vocabToken, id] of this.sortedVocab) {
|
|
172
|
+
// Skip special tokens and tokens starting with ▁ for non-word-start positions
|
|
173
|
+
if (vocabToken.startsWith('<') || vocabToken.startsWith('['))
|
|
174
|
+
continue;
|
|
175
|
+
if (!isWordStart && vocabToken.startsWith('▁'))
|
|
176
|
+
continue;
|
|
177
|
+
if (remaining.startsWith(vocabToken.replace(/^▁/, ''))) {
|
|
178
|
+
const matchLength = vocabToken.replace(/^▁/, '').length;
|
|
179
|
+
if (matchLength > 0) {
|
|
180
|
+
return { token: vocabToken, id, length: matchLength };
|
|
181
|
+
}
|
|
235
182
|
}
|
|
236
183
|
}
|
|
237
|
-
//
|
|
238
|
-
|
|
184
|
+
// Single character fallback
|
|
185
|
+
const char = remaining[0];
|
|
186
|
+
const charId = this.vocab.get(char) ?? this.vocab.get('▁' + char) ?? this.unkId;
|
|
187
|
+
return { token: char, id: charId, length: 1 };
|
|
239
188
|
}
|
|
240
189
|
/**
|
|
241
190
|
* Decodes token IDs back to text
|
|
242
191
|
*/
|
|
243
192
|
decode(tokenIds) {
|
|
244
|
-
const
|
|
193
|
+
const parts = [];
|
|
245
194
|
for (const id of tokenIds) {
|
|
246
195
|
const token = this.inverseVocab.get(id);
|
|
247
196
|
if (token === undefined)
|
|
248
197
|
continue;
|
|
249
|
-
|
|
250
|
-
if (token === this.config.clsToken ||
|
|
251
|
-
token === this.config.sepToken ||
|
|
252
|
-
token === this.config.padToken) {
|
|
198
|
+
if (token === this.clsToken || token === this.sepToken || token === this.padToken)
|
|
253
199
|
continue;
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
tokens.push(token.slice(2));
|
|
200
|
+
// SentencePiece uses ▁ to mark word boundaries
|
|
201
|
+
if (token.startsWith('▁')) {
|
|
202
|
+
parts.push(' ' + token.slice(1));
|
|
258
203
|
}
|
|
259
204
|
else {
|
|
260
|
-
|
|
261
|
-
tokens.push(' ');
|
|
262
|
-
}
|
|
263
|
-
tokens.push(token);
|
|
205
|
+
parts.push(token);
|
|
264
206
|
}
|
|
265
207
|
}
|
|
266
|
-
return
|
|
208
|
+
return parts.join('').trim();
|
|
267
209
|
}
|
|
268
210
|
/**
|
|
269
211
|
* Gets vocabulary size
|
|
@@ -285,15 +227,58 @@ export class WordPieceTokenizer {
|
|
|
285
227
|
}
|
|
286
228
|
}
|
|
287
229
|
/**
|
|
288
|
-
* Loads vocabulary from a
|
|
230
|
+
* Loads vocabulary from a file (supports tokenizer.json and vocab.txt)
|
|
289
231
|
*/
|
|
290
|
-
export async function loadVocabFromFile(
|
|
232
|
+
export async function loadVocabFromFile(filePath) {
|
|
291
233
|
const fs = await import('fs/promises');
|
|
292
|
-
const content = await fs.readFile(
|
|
293
|
-
|
|
234
|
+
const content = await fs.readFile(filePath, 'utf-8');
|
|
235
|
+
// Detect format
|
|
236
|
+
if (filePath.endsWith('.json') || content.trim().startsWith('{')) {
|
|
237
|
+
return parseHFTokenizerJson(content);
|
|
238
|
+
}
|
|
239
|
+
else {
|
|
240
|
+
return parseVocab(content);
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
/**
|
|
244
|
+
* Parses HuggingFace tokenizer.json format
|
|
245
|
+
*/
|
|
246
|
+
export function parseHFTokenizerJson(content) {
|
|
247
|
+
const vocab = new Map();
|
|
248
|
+
try {
|
|
249
|
+
const config = JSON.parse(content);
|
|
250
|
+
// Add special tokens first
|
|
251
|
+
if (Array.isArray(config.added_tokens)) {
|
|
252
|
+
for (const token of config.added_tokens) {
|
|
253
|
+
vocab.set(token.content, token.id);
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
// Add model vocabulary
|
|
257
|
+
if (config.model !== undefined && config.model.vocab !== undefined) {
|
|
258
|
+
if (Array.isArray(config.model.vocab)) {
|
|
259
|
+
// Unigram format: array of [token, score] pairs
|
|
260
|
+
for (let i = 0; i < config.model.vocab.length; i++) {
|
|
261
|
+
const entry = config.model.vocab[i];
|
|
262
|
+
if (entry && typeof entry[0] === 'string') {
|
|
263
|
+
vocab.set(entry[0], i);
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
else {
|
|
268
|
+
// BPE/WordPiece format: object mapping token -> id
|
|
269
|
+
for (const [token, id] of Object.entries(config.model.vocab)) {
|
|
270
|
+
vocab.set(token, id);
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
catch (e) {
|
|
276
|
+
throw new Error(`Failed to parse tokenizer.json: ${String(e)}`);
|
|
277
|
+
}
|
|
278
|
+
return vocab;
|
|
294
279
|
}
|
|
295
280
|
/**
|
|
296
|
-
* Parses vocabulary from string content
|
|
281
|
+
* Parses vocabulary from string content (vocab.txt format)
|
|
297
282
|
*/
|
|
298
283
|
export function parseVocab(content) {
|
|
299
284
|
const vocab = new Map();
|
|
@@ -311,26 +296,19 @@ export function parseVocab(content) {
|
|
|
311
296
|
*/
|
|
312
297
|
export function createTestVocab() {
|
|
313
298
|
const tokens = [
|
|
314
|
-
'
|
|
315
|
-
'
|
|
316
|
-
'
|
|
317
|
-
'
|
|
318
|
-
'
|
|
319
|
-
'
|
|
320
|
-
'
|
|
321
|
-
'
|
|
322
|
-
'
|
|
323
|
-
'
|
|
324
|
-
'
|
|
325
|
-
'
|
|
326
|
-
'germany',
|
|
327
|
-
'##s',
|
|
328
|
-
'##ed',
|
|
329
|
-
'##ing',
|
|
330
|
-
',',
|
|
331
|
-
'.',
|
|
299
|
+
'<s>',
|
|
300
|
+
'<pad>',
|
|
301
|
+
'</s>',
|
|
302
|
+
'<unk>',
|
|
303
|
+
'▁Hello',
|
|
304
|
+
'▁John',
|
|
305
|
+
'▁Smith',
|
|
306
|
+
'▁from',
|
|
307
|
+
'▁Acme',
|
|
308
|
+
'▁Corp',
|
|
309
|
+
'▁in',
|
|
310
|
+
'▁Berlin',
|
|
332
311
|
'!',
|
|
333
|
-
'?',
|
|
334
312
|
];
|
|
335
313
|
const vocab = new Map();
|
|
336
314
|
tokens.forEach((token, index) => {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tokenizer.js","sourceRoot":"","sources":["../../src/ner/tokenizer.ts"],"names":[],"mappings":"AAAA;;;;GAIG;
|
|
1
|
+
{"version":3,"file":"tokenizer.js","sourceRoot":"","sources":["../../src/ner/tokenizer.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAkEH;;GAEG;AACH,MAAM,CAAC,MAAM,wBAAwB,GAAoB;IACvD,SAAS,EAAE,GAAG;IACd,WAAW,EAAE,KAAK,EAAE,gCAAgC;CACrD,CAAC;AAEF;;GAEG;AACH,MAAM,OAAO,kBAAkB;IACrB,KAAK,CAAsB;IAC3B,YAAY,CAAsB;IAClC,MAAM,CAAkB;IACxB,WAAW,CAA0B;IAE7C,wCAAwC;IAChC,KAAK,GAAW,CAAC,CAAC,CAAE,MAAM;IAC1B,KAAK,GAAW,CAAC,CAAC,CAAE,OAAO;IAC3B,KAAK,GAAW,CAAC,CAAC,CAAE,QAAQ;IAC5B,KAAK,GAAW,CAAC,CAAC,CAAE,QAAQ;IAEpC,wBAAwB;IAChB,QAAQ,GAAW,KAAK,CAAC;IACzB,QAAQ,GAAW,MAAM,CAAC;IAC1B,QAAQ,GAAW,OAAO,CAAC;IAC3B,QAAQ,GAAW,OAAO,CAAC;IAEnC,YAAY,KAA0B,EAAE,SAAmC,EAAE;QAC3E,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;QACnB,IAAI,CAAC,MAAM,GAAG,EAAE,GAAG,wBAAwB,EAAE,GAAG,MAAM,EAAE,CAAC;QAEzD,sBAAsB;QACtB,IAAI,CAAC,YAAY,GAAG,IAAI,GAAG,EAAE,CAAC;QAC9B,KAAK,MAAM,CAAC,KAAK,EAAE,EAAE,CAAC,IAAI,KAAK,EAAE,CAAC;YAChC,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,EAAE,EAAE,KAAK,CAAC,CAAC;QACnC,CAAC;QAED,iEAAiE;QACjE,IAAI,CAAC,WAAW,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;QAEzF,0CAA0C;QAC1C,IAAI,CAAC,mBAAmB,EAAE,CAAC;IAC7B,CAAC;IAED;;OAEG;IACK,mBAAmB;QACzB,oBAAoB;QACpB,IAAI,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;YAC1B,IAAI,CAAC,QAAQ,GAAG,KAAK,CAAC;YACtB,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;YACxC,IAAI,CAAC,QAAQ,GAAG,MAAM,CAAC;YACvB,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;YACzC,IAAI,CAAC,QAAQ,GAAG,OAAO,CAAC;YACxB,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;YAC1C,IAAI,CAAC,QAAQ,GAAG,OAAO,CAAC;YACxB,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QAC5C,CAAC;QACD,aAAa;aACR,IAAI,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC;YACjC,IAAI,CAAC,QAAQ,GAAG,OAAO,CAAC;YACxB,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,GAAG,CAAC;YAC5C,IAAI,CAAC,QAAQ,GAAG,OAAO,CAAC;YACxB,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,GAAG,CAAC;YAC5C,IAAI,CAAC,QAAQ,GAAG,OAAO,CAAC;YACxB,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;YAC1C,IAAI,CAAC,QAAQ,GAAG,OAAO,CAAC;YACxB,IAAI,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,GAAG,CAAC;QAC9C,CAAC;IACH,CAAC;IAED;;OAEG;IACH,QAAQ,CAAC,IAAY;QACnB,MAAM,MAAM,GAAY,EAAE,CAAC;QAC3B,MAAM,eAAe,GAAmC,EAAE,CAAC;QAE3D,gBAAgB;QAChB,MAAM,CAAC,IAAI,CAAC;YACV,EAAE,EAAE,IAAI,CAAC,KAAK;YACd,KAAK,EAAE,IAAI,CAAC,QAAQ;YACpB,KAAK,EAAE,CAAC;YACR,GAAG,EAAE,CAAC;YACN,cAAc,EAAE,KAAK;YACrB,SAAS,EAAE,IAAI;SAChB,CAAC,CAAC;QACH,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAE3B,kBAAkB;QAClB,MAAM,aAAa,GAAG,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;QAE1E,sCAAsC;QACtC,IAAI,GAAG,GAAG,CAAC,CAAC;QACZ,OAAO,GAAG,GAAG,aAAa,CAAC,MAAM,EAAE,CAAC;YAClC,kBAAkB;YAClB,IAAI,IAAI,CAAC,IAAI,CAAC,aAAa,CAAC,GAAG,CAAE,CAAC,EAAE,CAAC;gBACnC,GAAG,EAAE,CAAC;gBACN,SAAS;YACX,CAAC;YAED,4DAA4D;YAC5D,MAAM,EAAE,KAAK,EAAE,EAAE,EAAE,MAAM,EAAE,GAAG,IAAI,CAAC,aAAa,CAAC,aAAa,EAAE,GAAG,CAAC,CAAC;YAErE,MAAM,aAAa,GAAG,GAAG,KAAK,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,aAAa,CAAC,GAAG,GAAG,CAAC,CAAE,CAAC,CAAC;YAEtE,MAAM,CAAC,IAAI,CAAC;gBACV,EAAE;gBACF,KAAK;gBACL,KAAK,EAAE,GAAG;gBACV,GAAG,EAAE,GAAG,GAAG,MAAM;gBACjB,cAAc,EAAE,CAAC,aAAa,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC;gBACxD,SAAS,EAAE,KAAK;aACjB,CAAC,CAAC;YACH,eAAe,CAAC,IAAI,CAAC,CAAC,GAAG,EAAE,GAAG,GAAG,MAAM,CAAC,CAAC,CAAC;YAE1C,GAAG,IAAI,MAAM,CAAC;QAChB,CAAC;QAED,gBAAgB;QAChB,MAAM,CAAC,IAAI,CAAC;YACV,EAAE,EAAE,IAAI,CAAC,KAAK;YACd,KAAK,EAAE,IAAI,CAAC,QAAQ;YACpB,KAAK,EAAE,IAAI,CAAC,MAAM;YAClB,GAAG,EAAE,IAAI,CAAC,MAAM;YAChB,cAAc,EAAE,KAAK;YACrB,SAAS,EAAE,IAAI;SAChB,CAAC,CAAC;QACH,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAE3B,wBAAwB;QACxB,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC;QACxC,IAAI,MAAM,CAAC,MAAM,GAAG,SAAS,EAAE,CAAC;YAC9B,MAAM,CAAC,MAAM,GAAG,SAAS,GAAG,CAAC,CAAC;YAC9B,eAAe,CAAC,MAAM,GAAG,SAAS,GAAG,CAAC,CAAC;YACvC,MAAM,CAAC,IAAI,CAAC;gBACV,EAAE,EAAE,IAAI,CAAC,KAAK;gBACd,KAAK,EAAE,IAAI,CAAC,QAAQ;gBACpB,KAAK,EAAE,IAAI,CAAC,MAAM;gBAClB,GAAG,EAAE,IAAI,CAAC,MAAM;gBAChB,cAAc,EAAE,KAAK;gBACrB,SAAS,EAAE,IAAI;aAChB,CAAC,CAAC;YACH,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC7B,CAAC;QAED,eAAe;QACf,MAAM,QAAQ,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QACzC,MAAM,aAAa,GAAG,MAAM,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC;QAC1C,MAAM,YAAY,GAAG,MAAM,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC;QAEzC,OAAO;YACL,MAAM;YACN,QAAQ;YACR,aAAa;YACb,YAAY;YACZ,eAAe;SAChB,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,aAAa,CAAC,IAAY,EAAE,QAAgB;QAClD,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;QAEvC,+DAA+D;QAC/D,MAAM,WAAW,GAAG,QAAQ,KAAK,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,GAAG,CAAC,CAAE,CAAC,CAAC;QAErE,6DAA6D;QAC7D,IAAI,WAAW,EAAE,CAAC;YAChB,0BAA0B;YAC1B,MAAM,UAAU,GAAG,GAAG,GAAG,SAAS,CAAC;YACnC,KAAK,MAAM,CAAC,UAAU,EAAE,EAAE,CAAC,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;gBAChD,IAAI,UAAU,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;oBACtC,0EAA0E;oBAC1E,OAAO;wBACL,KAAK,EAAE,UAAU;wBACjB,EAAE;wBACF,MAAM,EAAE,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,uBAAuB;qBACtD,CAAC;gBACJ,CAAC;YACH,CAAC;QACH,CAAC;QAED,iCAAiC;QACjC,KAAK,MAAM,CAAC,UAAU,EAAE,EAAE,CAAC,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YAChD,8EAA8E;YAC9E,IAAI,UAAU,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,UAAU,CAAC,UAAU,CAAC,GAAG,CAAC;gBAAE,SAAS;YACvE,IAAI,CAAC,WAAW,IAAI,UAAU,CAAC,UAAU,CAAC,GAAG,CAAC;gBAAE,SAAS;YAEzD,IAAI,SAAS,CAAC,UAAU,CAAC,UAAU,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC;gBACvD,MAAM,WAAW,GAAG,UAAU,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,MAAM,CAAC;gBACxD,IAAI,WAAW,GAAG,CAAC,EAAE,CAAC;oBACpB,OAAO,EAAE,KAAK,EAAE,UAAU,EAAE,EAAE,EAAE,MAAM,EAAE,WAAW,EAAE,CAAC;gBACxD,CAAC;YACH,CAAC;QACH,CAAC;QAED,4BAA4B;QAC5B,MAAM,IAAI,GAAG,SAAS,CAAC,CAAC,CAAE,CAAC;QAC3B,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,GAAG,IAAI,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC;QAChF,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,CAAC,EAAE,CAAC;IAChD,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,QAAkB;QACvB,MAAM,KAAK,GAAa,EAAE,CAAC;QAE3B,KAAK,MAAM,EAAE,IAAI,QAAQ,EAAE,CAAC;YAC1B,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YACxC,IAAI,KAAK,KAAK,SAAS;gBAAE,SAAS;YAClC,IAAI,KAAK,KAAK,IAAI,CAAC,QAAQ,IAAI,KAAK,KAAK,IAAI,CAAC,QAAQ,IAAI,KAAK,KAAK,IAAI,CAAC,QAAQ;gBAAE,SAAS;YAE5F,+CAA+C;YAC/C,IAAI,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC1B,KAAK,CAAC,IAAI,CAAC,GAAG,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;YACnC,CAAC;iBAAM,CAAC;gBACN,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACpB,CAAC;QACH,CAAC;QAED,OAAO,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;IAC/B,CAAC;IAED;;OAEG;IACH,IAAI,SAAS;QACX,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC;IACzB,CAAC;IAED;;OAEG;IACH,UAAU,CAAC,KAAa;QACtB,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;IAC/B,CAAC;IAED;;OAEG;IACH,QAAQ,CAAC,EAAU;QACjB,OAAO,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IACnC,CAAC;CACF;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CAAC,QAAgB;IACtD,MAAM,EAAE,GAAG,MAAM,MAAM,CAAC,aAAa,CAAC,CAAC;IACvC,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IAErD,gBAAgB;IAChB,IAAI,QAAQ,CAAC,QAAQ,CAAC,OAAO,CAAC,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;QACjE,OAAO,oBAAoB,CAAC,OAAO,CAAC,CAAC;IACvC,CAAC;SAAM,CAAC;QACN,OAAO,UAAU,CAAC,OAAO,CAAC,CAAC;IAC7B,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,oBAAoB,CAAC,OAAe;IAClD,MAAM,KAAK,GAAG,IAAI,GAAG,EAAkB,CAAC;IAExC,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAsB,CAAC;QAExD,2BAA2B;QAC3B,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,YAAY,CAAC,EAAE,CAAC;YACvC,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,YAAY,EAAE,CAAC;gBACxC,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,OAAO,EAAE,KAAK,CAAC,EAAE,CAAC,CAAC;YACrC,CAAC;QACH,CAAC;QAED,uBAAuB;QACvB,IAAI,MAAM,CAAC,KAAK,KAAK,SAAS,IAAI,MAAM,CAAC,KAAK,CAAC,KAAK,KAAK,SAAS,EAAE,CAAC;YACnE,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC;gBACtC,gDAAgD;gBAChD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;oBACnD,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;oBACpC,IAAI,KAAK,IAAI,OAAO,KAAK,CAAC,CAAC,CAAC,KAAK,QAAQ,EAAE,CAAC;wBAC1C,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;oBACzB,CAAC;gBACH,CAAC;YACH,CAAC;iBAAM,CAAC;gBACN,mDAAmD;gBACnD,KAAK,MAAM,CAAC,KAAK,EAAE,EAAE,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC;oBAC7D,KAAK,CAAC,GAAG,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;gBACvB,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,MAAM,IAAI,KAAK,CAAC,mCAAmC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;IAClE,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,UAAU,CAAC,OAAe;IACxC,MAAM,KAAK,GAAG,IAAI,GAAG,EAAkB,CAAC;IACxC,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAElC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC;QAC/B,IAAI,KAAK,KAAK,SAAS,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC5C,KAAK,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;QACtB,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,eAAe;IAC7B,MAAM,MAAM,GAAG;QACb,KAAK;QACL,OAAO;QACP,MAAM;QACN,OAAO;QACP,QAAQ;QACR,OAAO;QACP,QAAQ;QACR,OAAO;QACP,OAAO;QACP,OAAO;QACP,KAAK;QACL,SAAS;QACT,GAAG;KACJ,CAAC;IAEF,MAAM,KAAK,GAAG,IAAI,GAAG,EAAkB,CAAC;IACxC,MAAM,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE;QAC9B,KAAK,CAAC,GAAG,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;IAC1B,CAAC,CAAC,CAAC;IAEH,OAAO,KAAK,CAAC;AACf,CAAC"}
|
package/dist/pipeline/index.d.ts
CHANGED
|
@@ -2,8 +2,11 @@
|
|
|
2
2
|
* Pipeline Module
|
|
3
3
|
* Exports all pipeline components
|
|
4
4
|
*/
|
|
5
|
-
export * from
|
|
6
|
-
export * from
|
|
7
|
-
export * from
|
|
8
|
-
export * from
|
|
5
|
+
export * from "./prenormalize.js";
|
|
6
|
+
export * from "./resolver.js";
|
|
7
|
+
export * from "./tagger.js";
|
|
8
|
+
export * from "./validator.js";
|
|
9
|
+
export * from "./semantic-enricher.js";
|
|
10
|
+
export * from "./semantic-data-loader.js";
|
|
11
|
+
export * from "./title-extractor.js";
|
|
9
12
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/pipeline/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,cAAc,mBAAmB,CAAC;AAClC,cAAc,eAAe,CAAC;AAC9B,cAAc,aAAa,CAAC;AAC5B,cAAc,gBAAgB,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/pipeline/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,cAAc,mBAAmB,CAAC;AAClC,cAAc,eAAe,CAAC;AAC9B,cAAc,aAAa,CAAC;AAC5B,cAAc,gBAAgB,CAAC;AAC/B,cAAc,wBAAwB,CAAC;AACvC,cAAc,2BAA2B,CAAC;AAC1C,cAAc,sBAAsB,CAAC"}
|
package/dist/pipeline/index.js
CHANGED
|
@@ -2,8 +2,11 @@
|
|
|
2
2
|
* Pipeline Module
|
|
3
3
|
* Exports all pipeline components
|
|
4
4
|
*/
|
|
5
|
-
export * from
|
|
6
|
-
export * from
|
|
7
|
-
export * from
|
|
8
|
-
export * from
|
|
5
|
+
export * from "./prenormalize.js";
|
|
6
|
+
export * from "./resolver.js";
|
|
7
|
+
export * from "./tagger.js";
|
|
8
|
+
export * from "./validator.js";
|
|
9
|
+
export * from "./semantic-enricher.js";
|
|
10
|
+
export * from "./semantic-data-loader.js";
|
|
11
|
+
export * from "./title-extractor.js";
|
|
9
12
|
//# sourceMappingURL=index.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/pipeline/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,cAAc,mBAAmB,CAAC;AAClC,cAAc,eAAe,CAAC;AAC9B,cAAc,aAAa,CAAC;AAC5B,cAAc,gBAAgB,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/pipeline/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,cAAc,mBAAmB,CAAC;AAClC,cAAc,eAAe,CAAC;AAC9B,cAAc,aAAa,CAAC;AAC5B,cAAc,gBAAgB,CAAC;AAC/B,cAAc,wBAAwB,CAAC;AACvC,cAAc,2BAA2B,CAAC;AAC1C,cAAc,sBAAsB,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"resolver.d.ts","sourceRoot":"","sources":["../../src/pipeline/resolver.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAEL,SAAS,EAET,mBAAmB,EAEpB,MAAM,mBAAmB,CAAC;AAG3B;;GAEG;AACH,oBAAY,eAAe;IACzB,wCAAwC;IACxC,cAAc,mBAAmB;IACjC,uBAAuB;IACvB,WAAW,gBAAgB;IAC3B,6BAA6B;IAC7B,iBAAiB,sBAAsB;IACvC,oCAAoC;IACpC,aAAa,kBAAkB;CAChC;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,8CAA8C;IAC9C,eAAe,EAAE,eAAe,CAAC;IACjC,mDAAmD;IACnD,aAAa,EAAE,OAAO,CAAC;IACvB,2CAA2C;IAC3C,aAAa,EAAE,MAAM,CAAC;CACvB;AAED;;GAEG;AACH,eAAO,MAAM,uBAAuB,EAAE,cAIrC,CAAC;AAEF;;GAEG;AACH,wBAAgB,eAAe,CAC7B,YAAY,EAAE,SAAS,EAAE,EACzB,UAAU,EAAE,SAAS,EAAE,EACvB,MAAM,EAAE,mBAAmB,EAC3B,YAAY,EAAE,MAAM,EACpB,MAAM,GAAE,OAAO,CAAC,cAAc,CAAM,GACnC,SAAS,EAAE,CAyBb;
|
|
1
|
+
{"version":3,"file":"resolver.d.ts","sourceRoot":"","sources":["../../src/pipeline/resolver.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAEL,SAAS,EAET,mBAAmB,EAEpB,MAAM,mBAAmB,CAAC;AAG3B;;GAEG;AACH,oBAAY,eAAe;IACzB,wCAAwC;IACxC,cAAc,mBAAmB;IACjC,uBAAuB;IACvB,WAAW,gBAAgB;IAC3B,6BAA6B;IAC7B,iBAAiB,sBAAsB;IACvC,oCAAoC;IACpC,aAAa,kBAAkB;CAChC;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,8CAA8C;IAC9C,eAAe,EAAE,eAAe,CAAC;IACjC,mDAAmD;IACnD,aAAa,EAAE,OAAO,CAAC;IACvB,2CAA2C;IAC3C,aAAa,EAAE,MAAM,CAAC;CACvB;AAED;;GAEG;AACH,eAAO,MAAM,uBAAuB,EAAE,cAIrC,CAAC;AAEF;;GAEG;AACH,wBAAgB,eAAe,CAC7B,YAAY,EAAE,SAAS,EAAE,EACzB,UAAU,EAAE,SAAS,EAAE,EACvB,MAAM,EAAE,mBAAmB,EAC3B,YAAY,EAAE,MAAM,EACpB,MAAM,GAAE,OAAO,CAAC,cAAc,CAAM,GACnC,SAAS,EAAE,CAyBb;AAkOD;;;GAGG;AACH,wBAAgB,oBAAoB,CAClC,YAAY,EAAE,SAAS,EAAE,GACxB,KAAK,CAAC;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,GAAG,EAAE,MAAM,CAAA;CAAE,CAAC,CAEvC;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAC/B,IAAI,EAAE;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,GAAG,EAAE,MAAM,CAAA;CAAE,EACpC,cAAc,EAAE,KAAK,CAAC;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,GAAG,EAAE,MAAM,CAAA;CAAE,CAAC,GACpD,OAAO,CAET"}
|
|
@@ -93,8 +93,9 @@ function applyDenylist(matches, policy, originalText) {
|
|
|
93
93
|
if (match.index === undefined)
|
|
94
94
|
continue;
|
|
95
95
|
// Check if this is already covered by existing matches
|
|
96
|
-
const
|
|
97
|
-
|
|
96
|
+
const matchIndex = match.index;
|
|
97
|
+
const alreadyCovered = matches.some((existing) => existing.start <= matchIndex &&
|
|
98
|
+
existing.end >= matchIndex + match[0].length);
|
|
98
99
|
if (!alreadyCovered) {
|
|
99
100
|
denylistMatches.push({
|
|
100
101
|
type: PIIType.EMAIL, // Default type for denylist; could be configurable
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"resolver.js","sourceRoot":"","sources":["../../src/pipeline/resolver.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EACL,OAAO,EAEP,eAAe,EAEf,qBAAqB,GACtB,MAAM,mBAAmB,CAAC;AAC3B,OAAO,EAAE,YAAY,EAAE,UAAU,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAEpF;;GAEG;AACH,MAAM,CAAN,IAAY,eASX;AATD,WAAY,eAAe;IACzB,wCAAwC;IACxC,oDAAiC,CAAA;IACjC,uBAAuB;IACvB,8CAA2B,CAAA;IAC3B,6BAA6B;IAC7B,0DAAuC,CAAA;IACvC,oCAAoC;IACpC,kDAA+B,CAAA;AACjC,CAAC,EATW,eAAe,KAAf,eAAe,QAS1B;AAcD;;GAEG;AACH,MAAM,CAAC,MAAM,uBAAuB,GAAmB;IACrD,eAAe,EAAE,eAAe,CAAC,cAAc;IAC/C,aAAa,EAAE,IAAI;IACnB,aAAa,EAAE,GAAG;CACnB,CAAC;AAEF;;GAEG;AACH,MAAM,UAAU,eAAe,CAC7B,YAAyB,EACzB,UAAuB,EACvB,MAA2B,EAC3B,YAAoB,EACpB,SAAkC,EAAE;IAEpC,MAAM,cAAc,GAAG,EAAE,GAAG,uBAAuB,EAAE,GAAG,MAAM,EAAE,CAAC;IAEjE,4DAA4D;IAC5D,MAAM,aAAa,GAAG,cAAc,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC;IAC3D,MAAM,WAAW,GAAG,cAAc,CAAC,UAAU,EAAE,MAAM,CAAC,CAAC;IAEvD,oCAAoC;IACpC,MAAM,sBAAsB,GAAG,cAAc,CAAC,aAAa,EAAE,MAAM,EAAE,YAAY,CAAC,CAAC;IACnF,MAAM,oBAAoB,GAAG,cAAc,CAAC,WAAW,EAAE,MAAM,EAAE,YAAY,CAAC,CAAC;IAE/E,8BAA8B;IAC9B,MAAM,UAAU,GAAG,CAAC,GAAG,sBAAsB,EAAE,GAAG,oBAAoB,CAAC,CAAC;IAExE,4CAA4C;IAC5C,MAAM,QAAQ,GAAG,cAAc,CAAC,UAAU,EAAE,MAAM,EAAE,cAAc,CAAC,CAAC;IAEpE,kDAAkD;IAClD,MAAM,YAAY,GAAG,aAAa,CAAC,QAAQ,EAAE,MAAM,EAAE,YAAY,CAAC,CAAC;IAEnE,8BAA8B;IAC9B,MAAM,YAAY,GAAG,gBAAgB,CAAC,YAAY,CAAC,CAAC;IAEpD,2BAA2B;IAC3B,OAAO,mBAAmB,CAAC,YAAY,CAAC,CAAC;AAC3C,CAAC;AAED;;GAEG;AACH,SAAS,cAAc,CAAC,OAAoB,EAAE,MAA2B;IACvE,OAAO,OAAO,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE;QAC9B,2BAA2B;QAC3B,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;YACzC,OAAO,KAAK,CAAC;QACf,CAAC;QAED,6BAA6B;QAC7B,MAAM,SAAS,GAAG,MAAM,CAAC,oBAAoB,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC;QACrE,IAAI,KAAK,CAAC,UAAU,GAAG,SAAS,EAAE,CAAC;YACjC,OAAO,KAAK,CAAC;QACf,CAAC;QAED,OAAO,IAAI,CAAC;IACd,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;GAEG;AACH,SAAS,cAAc,CACrB,OAAoB,EACpB,MAA2B,EAC3B,aAAqB;IAErB,IAAI,MAAM,CAAC,cAAc,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;QACrC,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,OAAO,OAAO,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE;QAC9B,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC;QAClD,OAAO,CAAC,MAAM,CAAC,cAAc,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;IAC/C,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;GAEG;AACH,SAAS,aAAa,CACpB,OAAoB,EACpB,MAA2B,EAC3B,YAAoB;IAEpB,IAAI,MAAM,CAAC,gBAAgB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzC,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,MAAM,eAAe,GAAgB,EAAE,CAAC;IAExC,KAAK,MAAM,OAAO,IAAI,MAAM,CAAC,gBAAgB,EAAE,CAAC;QAC9C,MAAM,aAAa,GAAG,OAAO,CAAC,MAAM;YAClC,CAAC,CAAC,OAAO;YACT,CAAC,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,EAAE,OAAO,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC;QAEpD,KAAK,MAAM,KAAK,IAAI,YAAY,CAAC,QAAQ,CAAC,aAAa,CAAC,EAAE,CAAC;YACzD,IAAI,KAAK,CAAC,KAAK,KAAK,SAAS;gBAAE,SAAS;YAExC,uDAAuD;YACvD,MAAM,cAAc,GAAG,OAAO,CAAC,IAAI,CACjC,CAAC,QAAQ,EAAE,EAAE,CACX,QAAQ,CAAC,KAAK,IAAI,
|
|
1
|
+
{"version":3,"file":"resolver.js","sourceRoot":"","sources":["../../src/pipeline/resolver.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EACL,OAAO,EAEP,eAAe,EAEf,qBAAqB,GACtB,MAAM,mBAAmB,CAAC;AAC3B,OAAO,EAAE,YAAY,EAAE,UAAU,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAEpF;;GAEG;AACH,MAAM,CAAN,IAAY,eASX;AATD,WAAY,eAAe;IACzB,wCAAwC;IACxC,oDAAiC,CAAA;IACjC,uBAAuB;IACvB,8CAA2B,CAAA;IAC3B,6BAA6B;IAC7B,0DAAuC,CAAA;IACvC,oCAAoC;IACpC,kDAA+B,CAAA;AACjC,CAAC,EATW,eAAe,KAAf,eAAe,QAS1B;AAcD;;GAEG;AACH,MAAM,CAAC,MAAM,uBAAuB,GAAmB;IACrD,eAAe,EAAE,eAAe,CAAC,cAAc;IAC/C,aAAa,EAAE,IAAI;IACnB,aAAa,EAAE,GAAG;CACnB,CAAC;AAEF;;GAEG;AACH,MAAM,UAAU,eAAe,CAC7B,YAAyB,EACzB,UAAuB,EACvB,MAA2B,EAC3B,YAAoB,EACpB,SAAkC,EAAE;IAEpC,MAAM,cAAc,GAAG,EAAE,GAAG,uBAAuB,EAAE,GAAG,MAAM,EAAE,CAAC;IAEjE,4DAA4D;IAC5D,MAAM,aAAa,GAAG,cAAc,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC;IAC3D,MAAM,WAAW,GAAG,cAAc,CAAC,UAAU,EAAE,MAAM,CAAC,CAAC;IAEvD,oCAAoC;IACpC,MAAM,sBAAsB,GAAG,cAAc,CAAC,aAAa,EAAE,MAAM,EAAE,YAAY,CAAC,CAAC;IACnF,MAAM,oBAAoB,GAAG,cAAc,CAAC,WAAW,EAAE,MAAM,EAAE,YAAY,CAAC,CAAC;IAE/E,8BAA8B;IAC9B,MAAM,UAAU,GAAG,CAAC,GAAG,sBAAsB,EAAE,GAAG,oBAAoB,CAAC,CAAC;IAExE,4CAA4C;IAC5C,MAAM,QAAQ,GAAG,cAAc,CAAC,UAAU,EAAE,MAAM,EAAE,cAAc,CAAC,CAAC;IAEpE,kDAAkD;IAClD,MAAM,YAAY,GAAG,aAAa,CAAC,QAAQ,EAAE,MAAM,EAAE,YAAY,CAAC,CAAC;IAEnE,8BAA8B;IAC9B,MAAM,YAAY,GAAG,gBAAgB,CAAC,YAAY,CAAC,CAAC;IAEpD,2BAA2B;IAC3B,OAAO,mBAAmB,CAAC,YAAY,CAAC,CAAC;AAC3C,CAAC;AAED;;GAEG;AACH,SAAS,cAAc,CAAC,OAAoB,EAAE,MAA2B;IACvE,OAAO,OAAO,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE;QAC9B,2BAA2B;QAC3B,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;YACzC,OAAO,KAAK,CAAC;QACf,CAAC;QAED,6BAA6B;QAC7B,MAAM,SAAS,GAAG,MAAM,CAAC,oBAAoB,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC;QACrE,IAAI,KAAK,CAAC,UAAU,GAAG,SAAS,EAAE,CAAC;YACjC,OAAO,KAAK,CAAC;QACf,CAAC;QAED,OAAO,IAAI,CAAC;IACd,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;GAEG;AACH,SAAS,cAAc,CACrB,OAAoB,EACpB,MAA2B,EAC3B,aAAqB;IAErB,IAAI,MAAM,CAAC,cAAc,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;QACrC,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,OAAO,OAAO,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE;QAC9B,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC;QAClD,OAAO,CAAC,MAAM,CAAC,cAAc,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;IAC/C,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;GAEG;AACH,SAAS,aAAa,CACpB,OAAoB,EACpB,MAA2B,EAC3B,YAAoB;IAEpB,IAAI,MAAM,CAAC,gBAAgB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzC,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,MAAM,eAAe,GAAgB,EAAE,CAAC;IAExC,KAAK,MAAM,OAAO,IAAI,MAAM,CAAC,gBAAgB,EAAE,CAAC;QAC9C,MAAM,aAAa,GAAG,OAAO,CAAC,MAAM;YAClC,CAAC,CAAC,OAAO;YACT,CAAC,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,EAAE,OAAO,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC;QAEpD,KAAK,MAAM,KAAK,IAAI,YAAY,CAAC,QAAQ,CAAC,aAAa,CAAC,EAAE,CAAC;YACzD,IAAI,KAAK,CAAC,KAAK,KAAK,SAAS;gBAAE,SAAS;YAExC,uDAAuD;YACvD,MAAM,UAAU,GAAG,KAAK,CAAC,KAAK,CAAC;YAC/B,MAAM,cAAc,GAAG,OAAO,CAAC,IAAI,CACjC,CAAC,QAAQ,EAAE,EAAE,CACX,QAAQ,CAAC,KAAK,IAAI,UAAU;gBAC5B,QAAQ,CAAC,GAAG,IAAI,UAAU,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAC/C,CAAC;YAEF,IAAI,CAAC,cAAc,EAAE,CAAC;gBACpB,eAAe,CAAC,IAAI,CAAC;oBACnB,IAAI,EAAE,OAAO,CAAC,KAAK,EAAE,mDAAmD;oBACxE,KAAK,EAAE,KAAK,CAAC,KAAK;oBAClB,GAAG,EAAE,KAAK,CAAC,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM;oBAClC,UAAU,EAAE,GAAG;oBACf,MAAM,EAAE,eAAe,CAAC,KAAK;oBAC7B,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC;iBACf,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,CAAC,GAAG,OAAO,EAAE,GAAG,eAAe,CAAC,CAAC;AAC1C,CAAC;AAED;;GAEG;AACH,SAAS,cAAc,CACrB,OAAoB,EACpB,MAA2B,EAC3B,MAAsB;IAEtB,IAAI,OAAO,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;QACxB,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,yBAAyB;IACzB,MAAM,MAAM,GAAG,mBAAmB,CAAC,OAAO,CAAC,CAAC;IAC5C,MAAM,MAAM,GAAgB,EAAE,CAAC;IAE/B,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,qCAAqC;QACrC,MAAM,cAAc,GAAG,MAAM,CAAC,SAAS,CAAC,CAAC,QAAQ,EAAE,EAAE,CAAC,YAAY,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC,CAAC;QAErF,IAAI,cAAc,KAAK,CAAC,CAAC,EAAE,CAAC;YAC1B,2BAA2B;YAC3B,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACrB,CAAC;aAAM,CAAC;YACN,uBAAuB;YACvB,MAAM,QAAQ,GAAG,MAAM,CAAC,cAAc,CAAE,CAAC;YACzC,MAAM,MAAM,GAAG,cAAc,CAAC,QAAQ,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC;YAE/D,IAAI,MAAM,KAAK,KAAK,EAAE,CAAC;gBACrB,mCAAmC;gBACnC,MAAM,CAAC,cAAc,CAAC,GAAG,KAAK,CAAC;YACjC,CAAC;YACD,uCAAuC;QACzC,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;;GAGG;AACH,SAAS,cAAc,CACrB,CAAY,EACZ,CAAY,EACZ,MAA2B,EAC3B,MAAsB;IAEtB,+CAA+C;IAC/C,IAAI,MAAM,CAAC,aAAa,EAAE,CAAC;QACzB,IAAI,CAAC,CAAC,MAAM,KAAK,eAAe,CAAC,KAAK,IAAI,CAAC,CAAC,MAAM,KAAK,eAAe,CAAC,KAAK,EAAE,CAAC;YAC7E,OAAO,CAAC,CAAC;QACX,CAAC;QACD,IAAI,CAAC,CAAC,MAAM,KAAK,eAAe,CAAC,KAAK,IAAI,CAAC,CAAC,MAAM,KAAK,eAAe,CAAC,KAAK,EAAE,CAAC;YAC7E,OAAO,CAAC,CAAC;QACX,CAAC;IACH,CAAC;IAED,iCAAiC;IACjC,QAAQ,MAAM,CAAC,eAAe,EAAE,CAAC;QAC/B,KAAK,eAAe,CAAC,WAAW,CAAC,CAAC,CAAC;YACjC,MAAM,IAAI,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;YAC3B,MAAM,IAAI,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;YAC3B,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;gBAClB,OAAO,IAAI,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAC7B,CAAC;YACD,MAAM;QACR,CAAC;QAED,KAAK,eAAe,CAAC,iBAAiB,CAAC,CAAC,CAAC;YACvC,IAAI,CAAC,CAAC,UAAU,KAAK,CAAC,CAAC,UAAU,EAAE,CAAC;gBAClC,OAAO,CAAC,CAAC,UAAU,GAAG,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAC7C,CAAC;YACD,MAAM;QACR,CAAC;QAED,KAAK,eAAe,CAAC,aAAa,CAAC,CAAC,CAAC;YACnC,MAAM,SAAS,GAAG,eAAe,CAAC,CAAC,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;YAClD,MAAM,SAAS,GAAG,eAAe,CAAC,CAAC,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;YAClD,IAAI,SAAS,KAAK,SAAS,EAAE,CAAC;gBAC5B,OAAO,SAAS,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YACvC,CAAC;YACD,MAAM;QACR,CAAC;QAED,KAAK,eAAe,CAAC,cAAc,CAAC;QACpC;YACE,wBAAwB;YACxB,MAAM;IACV,CAAC;IAED,+DAA+D;IAC/D,MAAM,IAAI,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;IAC3B,MAAM,IAAI,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;IAC3B,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;QAClB,OAAO,IAAI,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC7B,CAAC;IAED,IAAI,CAAC,CAAC,UAAU,KAAK,CAAC,CAAC,UAAU,EAAE,CAAC;QAClC,OAAO,CAAC,CAAC,UAAU,GAAG,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC7C,CAAC;IAED,MAAM,SAAS,GAAG,eAAe,CAAC,CAAC,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;IAClD,MAAM,SAAS,GAAG,eAAe,CAAC,CAAC,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;IAClD,IAAI,SAAS,KAAK,SAAS,EAAE,CAAC;QAC5B,OAAO,SAAS,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IACvC,CAAC;IAED,mCAAmC;IACnC,OAAO,CAAC,CAAC;AACX,CAAC;AAED;;GAEG;AACH,SAAS,eAAe,CAAC,IAAa,EAAE,MAA2B;IACjE,MAAM,YAAY,GAAG,MAAM,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,GAAG,qBAAqB,CAAC,CAAC;IACvG,MAAM,KAAK,GAAG,YAAY,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;IACzC,OAAO,KAAK,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;AACjC,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB,CAAC,OAAoB;IAC5C,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,MAAM,MAAM,GAAgB,EAAE,CAAC;IAE/B,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC5B,MAAM,GAAG,GAAG,GAAG,KAAK,CAAC,KAAK,IAAI,KAAK,CAAC,GAAG,IAAI,KAAK,CAAC,IAAI,EAAE,CAAC;QACxD,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;YACnB,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YACd,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACrB,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,oBAAoB,CAClC,YAAyB;IAEzB,OAAO,YAAY,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,EAAE,GAAG,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC;AAChE,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,iBAAiB,CAC/B,IAAoC,EACpC,cAAqD;IAErD,OAAO,cAAc,CAAC,IAAI,CAAC,CAAC,UAAU,EAAE,EAAE,CAAC,YAAY,CAAC,IAAI,EAAE,UAAU,CAAC,CAAC,CAAC;AAC7E,CAAC"}
|