tarsec 0.4.4 → 0.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,6 +7,8 @@ import { CaptureParser, GeneralParser, InferManyReturnType, MergedCaptures, Merg
7
7
  * { captures: <array of captures> }
8
8
  * ```
9
9
  *
10
+ * If you're parsing characters only, prefer `takeWhile` for performance reasons.
11
+ *
10
12
  * @param parser - parser to run
11
13
  * @returns - parser that runs the given parser zero to many times,
12
14
  * and returns the result as an array
@@ -14,6 +16,7 @@ import { CaptureParser, GeneralParser, InferManyReturnType, MergedCaptures, Merg
14
16
  export declare function many<const T extends GeneralParser<any, any>>(parser: T): InferManyReturnType<T>;
15
17
  /**
16
18
  * Same as `many`, but fails if the parser doesn't match at least once.
19
+ * If you're parsing characters only, prefer `takeWhile1` for performance reasons.
17
20
  *
18
21
  * @param parser - parser to run
19
22
  * @returns a parser that runs the given parser one to many times,
@@ -11,6 +11,8 @@ import { escape } from "./utils.js";
11
11
  * { captures: <array of captures> }
12
12
  * ```
13
13
  *
14
+ * If you're parsing characters only, prefer `takeWhile` for performance reasons.
15
+ *
14
16
  * @param parser - parser to run
15
17
  * @returns - parser that runs the given parser zero to many times,
16
18
  * and returns the result as an array
@@ -50,6 +52,7 @@ export function many(parser) {
50
52
  }
51
53
  /**
52
54
  * Same as `many`, but fails if the parser doesn't match at least once.
55
+ * If you're parsing characters only, prefer `takeWhile1` for performance reasons.
53
56
  *
54
57
  * @param parser - parser to run
55
58
  * @returns a parser that runs the given parser one to many times,
package/dist/parsers.d.ts CHANGED
@@ -36,6 +36,86 @@ export declare function oneOf(chars: string): Parser<string>;
36
36
  * @returns - parser that parses a character that is not in the given string
37
37
  */
38
38
  export declare function noneOf(chars: string): Parser<string>;
39
+ /**
40
+ * Predicate over a single UTF-16 code unit (the value returned by
41
+ * `String.prototype.charCodeAt`). Used by `takeWhile` / `takeWhile1`
42
+ * to test each input character. Return `true` to keep consuming, `false`
43
+ * to stop.
44
+ *
45
+ * ```ts
46
+ * const isDigit: CharPredicate = (code) => code >= 0x30 && code <= 0x39;
47
+ * ```
48
+ */
49
+ export type CharPredicate = (code: number) => boolean;
50
+ /**
51
+ * A faster version of many/ manyWithJoin for character-class scanning.
52
+ *
53
+ * Consume the longest prefix of `input` whose characters all satisfy
54
+ * the predicate (or are contained in the given char-class string). Always
55
+ * succeeds; returns "" when nothing matches.
56
+ *
57
+ * This is the fast equivalent of `manyWithJoin(oneOf(chars))` for
58
+ * character-class scans (identifiers, digit runs, whitespace, etc.). It
59
+ * runs a tight `charCodeAt` loop and returns a single `slice`, avoiding
60
+ * the per-char string allocations and final `Array.join` of the
61
+ * combinator form.
62
+ *
63
+ * Useful for scanning runs of characters that don't form their own
64
+ * grammar — whitespace, identifier bodies, hex digit runs, etc.:
65
+ *
66
+ * ```ts
67
+ * // Skip any spaces/tabs without allocating.
68
+ * const inlineWs = takeWhile(" \t");
69
+ *
70
+ * // Scan an identifier body (caller already matched the first char).
71
+ * const identTail = takeWhile(
72
+ * "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"
73
+ * );
74
+ *
75
+ * // Predicate form: anything that isn't a quote or backslash.
76
+ * const stringText = takeWhile(
77
+ * (code) => code !== 0x22 && code !== 0x5c
78
+ * );
79
+ * ```
80
+ *
81
+ * @param charsOrPred - a string of allowed characters, or a `CharPredicate`
82
+ * @returns - a parser that consumes the matching prefix and always succeeds
83
+ */
84
+ export declare function takeWhile(charsOrPred: string | CharPredicate): Parser<string>;
85
+ /**
86
+ * Like `takeWhile`, but requires at least one matching character. On
87
+ * empty/no-match input, fails without consuming and records a rightmost-
88
+ * failure tagged with `expected` so error messages stay meaningful.
89
+ *
90
+ * Use this for things like identifier scanning, where an empty result
91
+ * is a parse error rather than a valid match:
92
+ *
93
+ * ```ts
94
+ * const VAR_CHARS =
95
+ * "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_";
96
+ *
97
+ * // Replaces the slower `many1WithJoin(varNameChar)`.
98
+ * const identifier = takeWhile1(VAR_CHARS, "an identifier");
99
+ *
100
+ * identifier("foo + bar"); // success("foo", " + bar")
101
+ * identifier(" hi"); // failure: "expected an identifier"
102
+ *
103
+ * // Predicate form with a custom error message.
104
+ * const hexRun = takeWhile1(
105
+ * (code) =>
106
+ * (code >= 0x30 && code <= 0x39) || // 0-9
107
+ * (code >= 0x41 && code <= 0x46) || // A-F
108
+ * (code >= 0x61 && code <= 0x66), // a-f
109
+ * "a hex digit",
110
+ * );
111
+ * ```
112
+ *
113
+ * @param charsOrPred - a string of allowed characters, or a `CharPredicate`
114
+ * @param expected - optional human-readable label used in error messages
115
+ * (defaults to `one of "<chars>"` for string inputs)
116
+ * @returns - a parser that consumes the matching prefix and fails if empty
117
+ */
118
+ export declare function takeWhile1(charsOrPred: string | CharPredicate, expected?: string): Parser<string>;
39
119
  /**
40
120
  * A parser that parses any one character.
41
121
  * Fails on empty strings, succeeds otherwise.
package/dist/parsers.js CHANGED
@@ -93,6 +93,138 @@ export function noneOf(chars) {
93
93
  return char(input[0])(input);
94
94
  });
95
95
  }
96
+ /**
97
+ * Compile a string of allowed characters or a user-supplied predicate
98
+ * into a fast `CharPredicate`. For string inputs whose characters are
99
+ * all ASCII (code points < 128), the result is a single 128-byte
100
+ * `Uint8Array` lookup — one array read per character test. Non-ASCII
101
+ * characters in the string fall back to a `Set<number>` check.
102
+ * Predicates pass through unchanged.
103
+ *
104
+ * @param charsOrPred - a string of allowed characters or a predicate function
105
+ * @returns - a `CharPredicate` suitable for use in tight scanning loops
106
+ */
107
+ function compileCharPredicate(charsOrPred) {
108
+ if (typeof charsOrPred === "function")
109
+ return charsOrPred;
110
+ const chars = charsOrPred;
111
+ const ascii = new Uint8Array(128);
112
+ let nonAscii = null;
113
+ for (let i = 0; i < chars.length; i++) {
114
+ const code = chars.charCodeAt(i);
115
+ if (code < 128) {
116
+ ascii[code] = 1;
117
+ }
118
+ else {
119
+ (nonAscii !== null && nonAscii !== void 0 ? nonAscii : (nonAscii = new Set())).add(code);
120
+ }
121
+ }
122
+ if (nonAscii === null) {
123
+ return (code) => code < 128 && ascii[code] === 1;
124
+ }
125
+ const set = nonAscii;
126
+ return (code) => code < 128 ? ascii[code] === 1 : set.has(code);
127
+ }
128
+ /**
129
+ * A faster version of many/ manyWithJoin for character-class scanning.
130
+ *
131
+ * Consume the longest prefix of `input` whose characters all satisfy
132
+ * the predicate (or are contained in the given char-class string). Always
133
+ * succeeds; returns "" when nothing matches.
134
+ *
135
+ * This is the fast equivalent of `manyWithJoin(oneOf(chars))` for
136
+ * character-class scans (identifiers, digit runs, whitespace, etc.). It
137
+ * runs a tight `charCodeAt` loop and returns a single `slice`, avoiding
138
+ * the per-char string allocations and final `Array.join` of the
139
+ * combinator form.
140
+ *
141
+ * Useful for scanning runs of characters that don't form their own
142
+ * grammar — whitespace, identifier bodies, hex digit runs, etc.:
143
+ *
144
+ * ```ts
145
+ * // Skip any spaces/tabs without allocating.
146
+ * const inlineWs = takeWhile(" \t");
147
+ *
148
+ * // Scan an identifier body (caller already matched the first char).
149
+ * const identTail = takeWhile(
150
+ * "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"
151
+ * );
152
+ *
153
+ * // Predicate form: anything that isn't a quote or backslash.
154
+ * const stringText = takeWhile(
155
+ * (code) => code !== 0x22 && code !== 0x5c
156
+ * );
157
+ * ```
158
+ *
159
+ * @param charsOrPred - a string of allowed characters, or a `CharPredicate`
160
+ * @returns - a parser that consumes the matching prefix and always succeeds
161
+ */
162
+ export function takeWhile(charsOrPred) {
163
+ const pred = compileCharPredicate(charsOrPred);
164
+ const label = typeof charsOrPred === "string"
165
+ ? `takeWhile(${escape(charsOrPred)})`
166
+ : "takeWhile(<predicate>)";
167
+ return trace(label, (input) => {
168
+ let i = 0;
169
+ const n = input.length;
170
+ while (i < n && pred(input.charCodeAt(i)))
171
+ i++;
172
+ return success(input.slice(0, i), input.slice(i));
173
+ });
174
+ }
175
+ /**
176
+ * Like `takeWhile`, but requires at least one matching character. On
177
+ * empty/no-match input, fails without consuming and records a rightmost-
178
+ * failure tagged with `expected` so error messages stay meaningful.
179
+ *
180
+ * Use this for things like identifier scanning, where an empty result
181
+ * is a parse error rather than a valid match:
182
+ *
183
+ * ```ts
184
+ * const VAR_CHARS =
185
+ * "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_";
186
+ *
187
+ * // Replaces the slower `many1WithJoin(varNameChar)`.
188
+ * const identifier = takeWhile1(VAR_CHARS, "an identifier");
189
+ *
190
+ * identifier("foo + bar"); // success("foo", " + bar")
191
+ * identifier(" hi"); // failure: "expected an identifier"
192
+ *
193
+ * // Predicate form with a custom error message.
194
+ * const hexRun = takeWhile1(
195
+ * (code) =>
196
+ * (code >= 0x30 && code <= 0x39) || // 0-9
197
+ * (code >= 0x41 && code <= 0x46) || // A-F
198
+ * (code >= 0x61 && code <= 0x66), // a-f
199
+ * "a hex digit",
200
+ * );
201
+ * ```
202
+ *
203
+ * @param charsOrPred - a string of allowed characters, or a `CharPredicate`
204
+ * @param expected - optional human-readable label used in error messages
205
+ * (defaults to `one of "<chars>"` for string inputs)
206
+ * @returns - a parser that consumes the matching prefix and fails if empty
207
+ */
208
+ export function takeWhile1(charsOrPred, expected) {
209
+ const pred = compileCharPredicate(charsOrPred);
210
+ const desc = expected !== null && expected !== void 0 ? expected : (typeof charsOrPred === "string"
211
+ ? `one of "${charsOrPred}"`
212
+ : "<predicate>");
213
+ const label = typeof charsOrPred === "string"
214
+ ? `takeWhile1(${escape(charsOrPred)})`
215
+ : "takeWhile1(<predicate>)";
216
+ return trace(label, (input) => {
217
+ let i = 0;
218
+ const n = input.length;
219
+ while (i < n && pred(input.charCodeAt(i)))
220
+ i++;
221
+ if (i === 0) {
222
+ recordFailure(input, desc);
223
+ return failure(`expected ${desc}`, input);
224
+ }
225
+ return success(input.slice(0, i), input.slice(i));
226
+ });
227
+ }
96
228
  /**
97
229
  * A parser that parses any one character.
98
230
  * Fails on empty strings, succeeds otherwise.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "tarsec",
3
- "version": "0.4.4",
3
+ "version": "0.4.6",
4
4
  "description": "A parser combinator library for TypeScript, inspired by Parsec.",
5
5
  "homepage": "https://github.com/egonSchiele/tarsec",
6
6
  "scripts": {
@@ -43,4 +43,4 @@
43
43
  "typescript": "^5.4.2",
44
44
  "vitest": "^1.4.0"
45
45
  }
46
- }
46
+ }