tarsec 0.4.4 → 0.4.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/parsers.d.ts CHANGED
@@ -36,6 +36,84 @@ export declare function oneOf(chars: string): Parser<string>;
36
36
  * @returns - parser that parses a character that is not in the given string
37
37
  */
38
38
  export declare function noneOf(chars: string): Parser<string>;
39
+ /**
40
+ * Predicate over a single UTF-16 code unit (the value returned by
41
+ * `String.prototype.charCodeAt`). Used by `takeWhile` / `takeWhile1`
42
+ * to test each input character. Return `true` to keep consuming, `false`
43
+ * to stop.
44
+ *
45
+ * ```ts
46
+ * const isDigit: CharPredicate = (code) => code >= 0x30 && code <= 0x39;
47
+ * ```
48
+ */
49
+ export type CharPredicate = (code: number) => boolean;
50
+ /**
51
+ * Consume the longest prefix of `input` whose characters all satisfy
52
+ * the predicate (or are contained in the given char-class string). Always
53
+ * succeeds; returns "" when nothing matches.
54
+ *
55
+ * This is the fast equivalent of `manyWithJoin(oneOf(chars))` for
56
+ * character-class scans (identifiers, digit runs, whitespace, etc.). It
57
+ * runs a tight `charCodeAt` loop and returns a single `slice`, avoiding
58
+ * the per-char string allocations and final `Array.join` of the
59
+ * combinator form.
60
+ *
61
+ * Useful for scanning runs of characters that don't form their own
62
+ * grammar — whitespace, identifier bodies, hex digit runs, etc.:
63
+ *
64
+ * ```ts
65
+ * // Skip any spaces/tabs without allocating.
66
+ * const inlineWs = takeWhile(" \t");
67
+ *
68
+ * // Scan an identifier body (caller already matched the first char).
69
+ * const identTail = takeWhile(
70
+ * "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"
71
+ * );
72
+ *
73
+ * // Predicate form: anything that isn't a quote or backslash.
74
+ * const stringText = takeWhile(
75
+ * (code) => code !== 0x22 && code !== 0x5c
76
+ * );
77
+ * ```
78
+ *
79
+ * @param charsOrPred - a string of allowed characters, or a `CharPredicate`
80
+ * @returns - a parser that consumes the matching prefix and always succeeds
81
+ */
82
+ export declare function takeWhile(charsOrPred: string | CharPredicate): Parser<string>;
83
+ /**
84
+ * Like `takeWhile`, but requires at least one matching character. On
85
+ * empty/no-match input, fails without consuming and records a rightmost-
86
+ * failure tagged with `expected` so error messages stay meaningful.
87
+ *
88
+ * Use this for things like identifier scanning, where an empty result
89
+ * is a parse error rather than a valid match:
90
+ *
91
+ * ```ts
92
+ * const VAR_CHARS =
93
+ * "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_";
94
+ *
95
+ * // Replaces the slower `many1WithJoin(varNameChar)`.
96
+ * const identifier = takeWhile1(VAR_CHARS, "an identifier");
97
+ *
98
+ * identifier("foo + bar"); // success("foo", " + bar")
99
+ * identifier(" hi"); // failure: "expected an identifier"
100
+ *
101
+ * // Predicate form with a custom error message.
102
+ * const hexRun = takeWhile1(
103
+ * (code) =>
104
+ * (code >= 0x30 && code <= 0x39) || // 0-9
105
+ * (code >= 0x41 && code <= 0x46) || // A-F
106
+ * (code >= 0x61 && code <= 0x66), // a-f
107
+ * "a hex digit",
108
+ * );
109
+ * ```
110
+ *
111
+ * @param charsOrPred - a string of allowed characters, or a `CharPredicate`
112
+ * @param expected - optional human-readable label used in error messages
113
+ * (defaults to `one of "<chars>"` for string inputs)
114
+ * @returns - a parser that consumes the matching prefix and fails if empty
115
+ */
116
+ export declare function takeWhile1(charsOrPred: string | CharPredicate, expected?: string): Parser<string>;
39
117
  /**
40
118
  * A parser that parses any one character.
41
119
  * Fails on empty strings, succeeds otherwise.
package/dist/parsers.js CHANGED
@@ -93,6 +93,136 @@ export function noneOf(chars) {
93
93
  return char(input[0])(input);
94
94
  });
95
95
  }
96
+ /**
97
+ * Compile a string of allowed characters or a user-supplied predicate
98
+ * into a fast `CharPredicate`. For string inputs whose characters are
99
+ * all ASCII (code points < 128), the result is a single 128-byte
100
+ * `Uint8Array` lookup — one array read per character test. Non-ASCII
101
+ * characters in the string fall back to a `Set<number>` check.
102
+ * Predicates pass through unchanged.
103
+ *
104
+ * @param charsOrPred - a string of allowed characters or a predicate function
105
+ * @returns - a `CharPredicate` suitable for use in tight scanning loops
106
+ */
107
+ function compileCharPredicate(charsOrPred) {
108
+ if (typeof charsOrPred === "function")
109
+ return charsOrPred;
110
+ const chars = charsOrPred;
111
+ const ascii = new Uint8Array(128);
112
+ let nonAscii = null;
113
+ for (let i = 0; i < chars.length; i++) {
114
+ const code = chars.charCodeAt(i);
115
+ if (code < 128) {
116
+ ascii[code] = 1;
117
+ }
118
+ else {
119
+ (nonAscii !== null && nonAscii !== void 0 ? nonAscii : (nonAscii = new Set())).add(code);
120
+ }
121
+ }
122
+ if (nonAscii === null) {
123
+ return (code) => code < 128 && ascii[code] === 1;
124
+ }
125
+ const set = nonAscii;
126
+ return (code) => code < 128 ? ascii[code] === 1 : set.has(code);
127
+ }
128
+ /**
129
+ * Consume the longest prefix of `input` whose characters all satisfy
130
+ * the predicate (or are contained in the given char-class string). Always
131
+ * succeeds; returns "" when nothing matches.
132
+ *
133
+ * This is the fast equivalent of `manyWithJoin(oneOf(chars))` for
134
+ * character-class scans (identifiers, digit runs, whitespace, etc.). It
135
+ * runs a tight `charCodeAt` loop and returns a single `slice`, avoiding
136
+ * the per-char string allocations and final `Array.join` of the
137
+ * combinator form.
138
+ *
139
+ * Useful for scanning runs of characters that don't form their own
140
+ * grammar — whitespace, identifier bodies, hex digit runs, etc.:
141
+ *
142
+ * ```ts
143
+ * // Skip any spaces/tabs without allocating.
144
+ * const inlineWs = takeWhile(" \t");
145
+ *
146
+ * // Scan an identifier body (caller already matched the first char).
147
+ * const identTail = takeWhile(
148
+ * "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"
149
+ * );
150
+ *
151
+ * // Predicate form: anything that isn't a quote or backslash.
152
+ * const stringText = takeWhile(
153
+ * (code) => code !== 0x22 && code !== 0x5c
154
+ * );
155
+ * ```
156
+ *
157
+ * @param charsOrPred - a string of allowed characters, or a `CharPredicate`
158
+ * @returns - a parser that consumes the matching prefix and always succeeds
159
+ */
160
+ export function takeWhile(charsOrPred) {
161
+ const pred = compileCharPredicate(charsOrPred);
162
+ const label = typeof charsOrPred === "string"
163
+ ? `takeWhile(${escape(charsOrPred)})`
164
+ : "takeWhile(<predicate>)";
165
+ return trace(label, (input) => {
166
+ let i = 0;
167
+ const n = input.length;
168
+ while (i < n && pred(input.charCodeAt(i)))
169
+ i++;
170
+ return success(input.slice(0, i), input.slice(i));
171
+ });
172
+ }
173
+ /**
174
+ * Like `takeWhile`, but requires at least one matching character. On
175
+ * empty/no-match input, fails without consuming and records a rightmost-
176
+ * failure tagged with `expected` so error messages stay meaningful.
177
+ *
178
+ * Use this for things like identifier scanning, where an empty result
179
+ * is a parse error rather than a valid match:
180
+ *
181
+ * ```ts
182
+ * const VAR_CHARS =
183
+ * "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_";
184
+ *
185
+ * // Replaces the slower `many1WithJoin(varNameChar)`.
186
+ * const identifier = takeWhile1(VAR_CHARS, "an identifier");
187
+ *
188
+ * identifier("foo + bar"); // success("foo", " + bar")
189
+ * identifier(" hi"); // failure: "expected an identifier"
190
+ *
191
+ * // Predicate form with a custom error message.
192
+ * const hexRun = takeWhile1(
193
+ * (code) =>
194
+ * (code >= 0x30 && code <= 0x39) || // 0-9
195
+ * (code >= 0x41 && code <= 0x46) || // A-F
196
+ * (code >= 0x61 && code <= 0x66), // a-f
197
+ * "a hex digit",
198
+ * );
199
+ * ```
200
+ *
201
+ * @param charsOrPred - a string of allowed characters, or a `CharPredicate`
202
+ * @param expected - optional human-readable label used in error messages
203
+ * (defaults to `one of "<chars>"` for string inputs)
204
+ * @returns - a parser that consumes the matching prefix and fails if empty
205
+ */
206
+ export function takeWhile1(charsOrPred, expected) {
207
+ const pred = compileCharPredicate(charsOrPred);
208
+ const desc = expected !== null && expected !== void 0 ? expected : (typeof charsOrPred === "string"
209
+ ? `one of "${charsOrPred}"`
210
+ : "<predicate>");
211
+ const label = typeof charsOrPred === "string"
212
+ ? `takeWhile1(${escape(charsOrPred)})`
213
+ : "takeWhile1(<predicate>)";
214
+ return trace(label, (input) => {
215
+ let i = 0;
216
+ const n = input.length;
217
+ while (i < n && pred(input.charCodeAt(i)))
218
+ i++;
219
+ if (i === 0) {
220
+ recordFailure(input, desc);
221
+ return failure(`expected ${desc}`, input);
222
+ }
223
+ return success(input.slice(0, i), input.slice(i));
224
+ });
225
+ }
96
226
  /**
97
227
  * A parser that parses any one character.
98
228
  * Fails on empty strings, succeeds otherwise.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "tarsec",
3
- "version": "0.4.4",
3
+ "version": "0.4.5",
4
4
  "description": "A parser combinator library for TypeScript, inspired by Parsec.",
5
5
  "homepage": "https://github.com/egonSchiele/tarsec",
6
6
  "scripts": {
@@ -43,4 +43,4 @@
43
43
  "typescript": "^5.4.2",
44
44
  "vitest": "^1.4.0"
45
45
  }
46
- }
46
+ }