@malloydata/malloy-filter 0.0.237-dev250221201621
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +81 -0
- package/SAMPLES.md +381 -0
- package/SERIALIZE_SAMPLES.md +300 -0
- package/dist/a_simple_parser.d.ts +1 -0
- package/dist/a_simple_parser.js +20 -0
- package/dist/a_simple_parser.js.map +1 -0
- package/dist/a_simple_serializer.d.ts +1 -0
- package/dist/a_simple_serializer.js +31 -0
- package/dist/a_simple_serializer.js.map +1 -0
- package/dist/base_parser.d.ts +13 -0
- package/dist/base_parser.js +33 -0
- package/dist/base_parser.js.map +1 -0
- package/dist/base_serializer.d.ts +6 -0
- package/dist/base_serializer.js +11 -0
- package/dist/base_serializer.js.map +1 -0
- package/dist/boolean_parser.d.ts +7 -0
- package/dist/boolean_parser.js +59 -0
- package/dist/boolean_parser.js.map +1 -0
- package/dist/boolean_serializer.d.ts +8 -0
- package/dist/boolean_serializer.js +31 -0
- package/dist/boolean_serializer.js.map +1 -0
- package/dist/clause_types.d.ts +70 -0
- package/dist/clause_types.js +3 -0
- package/dist/clause_types.js.map +1 -0
- package/dist/date_parser.d.ts +22 -0
- package/dist/date_parser.js +315 -0
- package/dist/date_parser.js.map +1 -0
- package/dist/date_serializer.d.ts +10 -0
- package/dist/date_serializer.js +100 -0
- package/dist/date_serializer.js.map +1 -0
- package/dist/filter_parser.d.ts +12 -0
- package/dist/filter_parser.js +66 -0
- package/dist/filter_parser.js.map +1 -0
- package/dist/filter_serializer.d.ts +13 -0
- package/dist/filter_serializer.js +43 -0
- package/dist/filter_serializer.js.map +1 -0
- package/dist/filter_types.d.ts +10 -0
- package/dist/filter_types.js +3 -0
- package/dist/filter_types.js.map +1 -0
- package/dist/generate_samples.d.ts +1 -0
- package/dist/generate_samples.js +344 -0
- package/dist/generate_samples.js.map +1 -0
- package/dist/number_parser.d.ts +20 -0
- package/dist/number_parser.js +275 -0
- package/dist/number_parser.js.map +1 -0
- package/dist/number_serializer.d.ts +11 -0
- package/dist/number_serializer.js +76 -0
- package/dist/number_serializer.js.map +1 -0
- package/dist/string_parser.d.ts +18 -0
- package/dist/string_parser.js +198 -0
- package/dist/string_parser.js.map +1 -0
- package/dist/string_serializer.d.ts +11 -0
- package/dist/string_serializer.js +77 -0
- package/dist/string_serializer.js.map +1 -0
- package/dist/token_types.d.ts +7 -0
- package/dist/token_types.js +3 -0
- package/dist/token_types.js.map +1 -0
- package/dist/tokenizer.d.ts +52 -0
- package/dist/tokenizer.js +263 -0
- package/dist/tokenizer.js.map +1 -0
- package/dist/tokenizer.spec.d.ts +1 -0
- package/dist/tokenizer.spec.js +255 -0
- package/dist/tokenizer.spec.js.map +1 -0
- package/jest.config.js +3 -0
- package/package.json +21 -0
- package/src/DEVELOPING.md +26 -0
- package/src/a_simple_parser.ts +22 -0
- package/src/a_simple_serializer.ts +40 -0
- package/src/base_parser.ts +45 -0
- package/src/base_serializer.ts +9 -0
- package/src/boolean_parser.ts +60 -0
- package/src/boolean_serializer.ts +32 -0
- package/src/clause_types.ts +160 -0
- package/src/date_parser.ts +413 -0
- package/src/date_serializer.ts +114 -0
- package/src/filter_parser.ts +68 -0
- package/src/filter_serializer.ts +49 -0
- package/src/filter_types.ts +12 -0
- package/src/generate_samples.ts +387 -0
- package/src/number_parser.ts +308 -0
- package/src/number_serializer.ts +96 -0
- package/src/string_parser.ts +193 -0
- package/src/string_serializer.ts +87 -0
- package/src/token_types.ts +7 -0
- package/src/tokenizer.spec.ts +273 -0
- package/src/tokenizer.ts +320 -0
- package/tsconfig.json +14 -0
package/src/tokenizer.ts
ADDED
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
import {Token} from './token_types';
|
|
2
|
+
|
|
3
|
+
export interface SpecialToken {
|
|
4
|
+
type: string;
|
|
5
|
+
value: string | RegExp;
|
|
6
|
+
ignoreCase?: boolean; // This is only applicable for typeof value == string. It is ignored for RegExp.
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
interface SpecialTokenMatch extends SpecialToken {
|
|
10
|
+
matchedString: string; // The actual string in the input that matched SpecialToken.value.
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export interface TokenizerParams {
|
|
14
|
+
specialSubstrings: SpecialToken[]; // Applied to the raw string on the first pass.
|
|
15
|
+
specialWords: SpecialToken[]; // Applied to the 'word' tokens as a second pass.
|
|
16
|
+
combineAdjacentWords?: boolean;
|
|
17
|
+
splitOnWhitespace?: boolean;
|
|
18
|
+
trimWordWhitespace?: boolean;
|
|
19
|
+
separators?: RegExp;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export class Tokenizer {
|
|
23
|
+
private input: string;
|
|
24
|
+
private index: number;
|
|
25
|
+
private specialSubstrings: SpecialToken[]; // Applied to the raw string on the first pass.
|
|
26
|
+
private specialWords: SpecialToken[]; // Applied to the 'word' tokens as a second pass.
|
|
27
|
+
private params: TokenizerParams;
|
|
28
|
+
|
|
29
|
+
constructor(input: string, params: TokenizerParams) {
|
|
30
|
+
this.index = 0;
|
|
31
|
+
this.specialSubstrings = params.specialSubstrings;
|
|
32
|
+
this.specialWords = params.specialWords;
|
|
33
|
+
this.params = params;
|
|
34
|
+
this.input = input;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
private tokenize(): Token[] {
|
|
38
|
+
let tokens: Token[] = [];
|
|
39
|
+
let wordStart = -1;
|
|
40
|
+
let wordEnd = -1;
|
|
41
|
+
let special: SpecialTokenMatch | undefined;
|
|
42
|
+
while (this.hasMoreInput()) {
|
|
43
|
+
if (this.params.splitOnWhitespace && this.isWhitespace(this.index)) {
|
|
44
|
+
this.maybeConsumeWord(wordStart, wordEnd, tokens);
|
|
45
|
+
wordStart = -1;
|
|
46
|
+
this.consumeWhitespace();
|
|
47
|
+
} else if (this.input[this.index] === '\\') {
|
|
48
|
+
if (wordStart === -1) {
|
|
49
|
+
wordStart = this.index;
|
|
50
|
+
}
|
|
51
|
+
wordEnd = this.index;
|
|
52
|
+
this.index++;
|
|
53
|
+
if (this.hasMoreInput()) {
|
|
54
|
+
// Unless backslash is at the end, handle next char.
|
|
55
|
+
wordEnd = this.index;
|
|
56
|
+
this.index++;
|
|
57
|
+
}
|
|
58
|
+
} else if ((special = this.isSpecialSubstring())) {
|
|
59
|
+
this.maybeConsumeWord(wordStart, wordEnd, tokens);
|
|
60
|
+
wordStart = -1;
|
|
61
|
+
this.consumeSpecialSubstring(special, tokens);
|
|
62
|
+
} else {
|
|
63
|
+
if (wordStart === -1) {
|
|
64
|
+
wordStart = this.index;
|
|
65
|
+
}
|
|
66
|
+
wordEnd = this.index;
|
|
67
|
+
this.index++;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
this.maybeConsumeWord(wordStart, wordEnd, tokens);
|
|
71
|
+
|
|
72
|
+
tokens = Tokenizer.convertSpecialWords(tokens, this.specialWords);
|
|
73
|
+
|
|
74
|
+
if (this.params.combineAdjacentWords) {
|
|
75
|
+
tokens = Tokenizer.combineAdjacentWords(tokens);
|
|
76
|
+
}
|
|
77
|
+
if (this.params.trimWordWhitespace) {
|
|
78
|
+
tokens = Tokenizer.trimWordWhitespace(tokens);
|
|
79
|
+
}
|
|
80
|
+
return tokens;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
public parse(): Token[] {
|
|
84
|
+
let tokens = this.tokenize();
|
|
85
|
+
if (this.params.trimWordWhitespace) {
|
|
86
|
+
tokens = Tokenizer.trimWordWhitespace(tokens);
|
|
87
|
+
}
|
|
88
|
+
return tokens;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
private hasMoreInput(): boolean {
|
|
92
|
+
return this.index < this.input.length;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
private isWhitespace(idx: number): boolean {
|
|
96
|
+
return /\s/.test(this.input[idx]);
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
private consumeWhitespace(): void {
|
|
100
|
+
while (this.hasMoreInput() && this.isWhitespace(this.index)) {
|
|
101
|
+
this.index++;
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
private maybeConsumeWord(
|
|
105
|
+
wordStart: number,
|
|
106
|
+
wordEnd: number,
|
|
107
|
+
tokens: Token[]
|
|
108
|
+
): void {
|
|
109
|
+
if (wordStart >= 0 && wordEnd >= wordStart) {
|
|
110
|
+
tokens.push({
|
|
111
|
+
type: 'word',
|
|
112
|
+
value: this.input.substring(wordStart, wordEnd + 1),
|
|
113
|
+
startIndex: wordStart,
|
|
114
|
+
endIndex: wordEnd + 1,
|
|
115
|
+
});
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
private isSpecialSubstring(): SpecialTokenMatch | undefined {
|
|
120
|
+
for (const special of this.specialSubstrings) {
|
|
121
|
+
if (special.value instanceof RegExp) {
|
|
122
|
+
const shifted = this.input.substring(this.index); // create a substring starting at index.
|
|
123
|
+
const matcher = special.value.exec(shifted);
|
|
124
|
+
if (matcher) {
|
|
125
|
+
return {
|
|
126
|
+
type: special.type,
|
|
127
|
+
value: special.value,
|
|
128
|
+
matchedString: matcher[0],
|
|
129
|
+
};
|
|
130
|
+
}
|
|
131
|
+
} else {
|
|
132
|
+
const subString = this.input.slice(
|
|
133
|
+
this.index,
|
|
134
|
+
this.index + special.value.length
|
|
135
|
+
);
|
|
136
|
+
const matches = special.ignoreCase
|
|
137
|
+
? subString.toLowerCase() === special.value.toLowerCase()
|
|
138
|
+
: subString === special.value;
|
|
139
|
+
if (matches) {
|
|
140
|
+
const value = special.ignoreCase
|
|
141
|
+
? subString.toUpperCase()
|
|
142
|
+
: subString;
|
|
143
|
+
return {
|
|
144
|
+
type: special.type,
|
|
145
|
+
value: special.value,
|
|
146
|
+
matchedString: value,
|
|
147
|
+
};
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
return undefined;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
private consumeSpecialSubstring(
|
|
155
|
+
special: SpecialTokenMatch,
|
|
156
|
+
tokens: Token[]
|
|
157
|
+
): void {
|
|
158
|
+
tokens.push({
|
|
159
|
+
type: special.type,
|
|
160
|
+
value: special.matchedString,
|
|
161
|
+
startIndex: this.index,
|
|
162
|
+
endIndex: this.index + special.matchedString.length,
|
|
163
|
+
});
|
|
164
|
+
this.index += special.matchedString.length;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
public static combineAdjacentWords(tokens: Token[]): Token[] {
|
|
168
|
+
const output: Token[] = [];
|
|
169
|
+
let previousToken: Token | undefined = undefined;
|
|
170
|
+
for (let i = 0; i < tokens.length; i++) {
|
|
171
|
+
const currentToken: Token = tokens[i];
|
|
172
|
+
if (
|
|
173
|
+
currentToken.type === 'word' &&
|
|
174
|
+
previousToken &&
|
|
175
|
+
previousToken.type === 'word'
|
|
176
|
+
) {
|
|
177
|
+
previousToken.value += currentToken.value;
|
|
178
|
+
previousToken.endIndex = currentToken.endIndex;
|
|
179
|
+
} else {
|
|
180
|
+
output.push(currentToken);
|
|
181
|
+
}
|
|
182
|
+
previousToken = currentToken;
|
|
183
|
+
}
|
|
184
|
+
return output;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
public static trimWordWhitespace(tokens: Token[]): Token[] {
|
|
188
|
+
const output: Token[] = [];
|
|
189
|
+
for (const token of tokens) {
|
|
190
|
+
if (token.type === 'word') {
|
|
191
|
+
token.value = token.value.trim();
|
|
192
|
+
}
|
|
193
|
+
output.push(token);
|
|
194
|
+
}
|
|
195
|
+
return output;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
public static isSpecialWord(
|
|
199
|
+
token: Token,
|
|
200
|
+
specials: SpecialToken[]
|
|
201
|
+
): Token | undefined {
|
|
202
|
+
for (const special of specials) {
|
|
203
|
+
if (special.value instanceof RegExp) {
|
|
204
|
+
const regexp = special.value;
|
|
205
|
+
regexp.lastIndex = 0; // Set the starting index for the search
|
|
206
|
+
if (regexp.test(token.value)) {
|
|
207
|
+
const value = special.ignoreCase
|
|
208
|
+
? token.value.toUpperCase()
|
|
209
|
+
: token.value;
|
|
210
|
+
return {
|
|
211
|
+
type: special.type,
|
|
212
|
+
value: value,
|
|
213
|
+
startIndex: token.startIndex,
|
|
214
|
+
endIndex: token.endIndex,
|
|
215
|
+
};
|
|
216
|
+
}
|
|
217
|
+
} else {
|
|
218
|
+
const matches = special.ignoreCase
|
|
219
|
+
? token.value.toLowerCase() === special.value.toLowerCase()
|
|
220
|
+
: token.value === special.value;
|
|
221
|
+
if (matches) {
|
|
222
|
+
const value = special.ignoreCase
|
|
223
|
+
? token.value.toUpperCase()
|
|
224
|
+
: token.value;
|
|
225
|
+
return {
|
|
226
|
+
type: special.type,
|
|
227
|
+
value: value,
|
|
228
|
+
startIndex: token.startIndex,
|
|
229
|
+
endIndex: token.endIndex,
|
|
230
|
+
};
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
return undefined;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
public static convertSpecialWords(
|
|
238
|
+
tokens: Token[],
|
|
239
|
+
specials: SpecialToken[]
|
|
240
|
+
): Token[] {
|
|
241
|
+
const output: Token[] = [];
|
|
242
|
+
let special: Token | undefined = undefined;
|
|
243
|
+
for (const token of tokens) {
|
|
244
|
+
if (
|
|
245
|
+
token.type === 'word' &&
|
|
246
|
+
(special = Tokenizer.isSpecialWord(token, specials))
|
|
247
|
+
) {
|
|
248
|
+
output.push(special);
|
|
249
|
+
} else {
|
|
250
|
+
output.push(token);
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
return output;
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
/**
|
|
257
|
+
* Checks if the token types starting at the given index match the types in the input string.
|
|
258
|
+
*
|
|
259
|
+
* @param types - A string of types separated by '|'. Eg 'TYPEA|TYPEB|TYPEC'
|
|
260
|
+
* @param tokens - An array of tokens.
|
|
261
|
+
* @param index - The index into the token array to start checking from.
|
|
262
|
+
* @returns True if the token types match, false otherwise.
|
|
263
|
+
*/
|
|
264
|
+
public static matchTypes(
|
|
265
|
+
types: string,
|
|
266
|
+
tokens: Token[],
|
|
267
|
+
index: number
|
|
268
|
+
): Token[] | undefined {
|
|
269
|
+
const typeArray = types.split('|');
|
|
270
|
+
if (index < 0 || index + typeArray.length > tokens.length) {
|
|
271
|
+
return undefined;
|
|
272
|
+
}
|
|
273
|
+
// Iterate over the types and check if they match the token types
|
|
274
|
+
for (let i = 0; i < typeArray.length; i++) {
|
|
275
|
+
if (index + i >= tokens.length) {
|
|
276
|
+
return undefined;
|
|
277
|
+
}
|
|
278
|
+
if (tokens[index + i].type !== typeArray[i]) {
|
|
279
|
+
return undefined;
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
return tokens.slice(index, index + typeArray.length);
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
/**
|
|
286
|
+
* Merges tokens that match a given type string into a single token with a specified merge type.
|
|
287
|
+
*
|
|
288
|
+
* @param types - A string of types separated by '|'.
|
|
289
|
+
* @param tokens - An array of tokens.
|
|
290
|
+
* @param mergeType - The type to use for the merged token.
|
|
291
|
+
* @returns The updated token list with merged tokens.
|
|
292
|
+
*/
|
|
293
|
+
public static mergeTypes(
|
|
294
|
+
types: string,
|
|
295
|
+
tokens: Token[],
|
|
296
|
+
mergeType: string
|
|
297
|
+
): Token[] {
|
|
298
|
+
const output: Token[] = [];
|
|
299
|
+
let i = 0;
|
|
300
|
+
while (i < tokens.length) {
|
|
301
|
+
// Check if the current token matches the type string
|
|
302
|
+
const matchedTokens = Tokenizer.matchTypes(types, tokens, i);
|
|
303
|
+
if (matchedTokens && matchedTokens.length > 0) {
|
|
304
|
+
const mergedToken: Token = {
|
|
305
|
+
type: mergeType,
|
|
306
|
+
value: '',
|
|
307
|
+
values: matchedTokens,
|
|
308
|
+
startIndex: matchedTokens[0].startIndex,
|
|
309
|
+
endIndex: matchedTokens[matchedTokens.length - 1].endIndex,
|
|
310
|
+
};
|
|
311
|
+
output.push(mergedToken);
|
|
312
|
+
i += matchedTokens.length;
|
|
313
|
+
} else {
|
|
314
|
+
output.push(tokens[i]);
|
|
315
|
+
i++;
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
return output;
|
|
319
|
+
}
|
|
320
|
+
}
|
package/tsconfig.json
ADDED