@sprig-and-prose/prose-parser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/biome.json ADDED
@@ -0,0 +1,23 @@
1
+ {
2
+ "$schema": "https://biomejs.dev/schemas/1.9.4/schema.json",
3
+ "organizeImports": {
4
+ "enabled": true
5
+ },
6
+ "linter": {
7
+ "enabled": true,
8
+ "rules": {
9
+ "recommended": true
10
+ }
11
+ },
12
+ "formatter": {
13
+ "enabled": true,
14
+ "indentStyle": "space",
15
+ "indentWidth": 2
16
+ },
17
+ "javascript": {
18
+ "formatter": {
19
+ "quoteStyle": "single",
20
+ "semicolons": "always"
21
+ }
22
+ }
23
+ }
package/package.json ADDED
@@ -0,0 +1,21 @@
1
+ {
2
+ "name": "@sprig-and-prose/prose-parser",
3
+ "version": "0.1.0",
4
+ "type": "module",
5
+ "description": "Generic prose scanner and parser core for sprig",
6
+ "main": "src/index.js",
7
+ "scripts": {
8
+ "format": "biome format . --write",
9
+ "lint": "biome lint .",
10
+ "typecheck": "tsc -p tsconfig.json",
11
+ "test": "node --test"
12
+ },
13
+ "keywords": [],
14
+ "author": "",
15
+ "license": "ISC",
16
+ "dependencies": {},
17
+ "devDependencies": {
18
+ "@biomejs/biome": "^1.9.4",
19
+ "typescript": "^5.7.2"
20
+ }
21
+ }
package/src/index.js ADDED
@@ -0,0 +1,8 @@
1
+ /**
2
+ * @fileoverview Public API for prose-parser
3
+ */
4
+
5
+ export { scan } from './scanner.js';
6
+ export { ParserCore } from './parser-core.js';
7
+ export { createSpan, createPointSpan, mergeSpans } from './util/span.js';
8
+ export { normalizeProseBlock, dedentPreserve } from './util/text.js';
@@ -0,0 +1,304 @@
1
+ /**
2
+ * @fileoverview Generic parser core with token navigation and raw content block parsing
3
+ */
4
+
5
+ /**
6
+ * @typedef {import('./scanner.js').Token} Token
7
+ */
8
+
9
+ /**
10
+ * @typedef {Object} SourceSpan
11
+ * @property {string} file - File path
12
+ * @property {{ line: number, col: number, offset: number }} start - Start position
13
+ * @property {{ line: number, col: number, offset: number }} end - End position
14
+ */
15
+
16
+ /**
17
+ * @typedef {Object} Diagnostic
18
+ * @property {'error' | 'warning'} severity - Diagnostic severity
19
+ * @property {string} message - Diagnostic message
20
+ * @property {SourceSpan} [source] - Optional source span
21
+ */
22
+
23
+ /**
24
+ * Core parser class with low-level token navigation utilities
25
+ */
26
+ export class ParserCore {
27
+ /**
28
+ * @param {Token[]} tokens
29
+ * @param {string} filePath
30
+ * @param {string} sourceText
31
+ */
32
+ constructor(tokens, filePath, sourceText) {
33
+ this.tokens = tokens;
34
+ this.filePath = filePath;
35
+ this.sourceText = sourceText;
36
+ this.index = 0;
37
+ this.diagnostics = [];
38
+ }
39
+
40
+ /**
41
+ * @returns {Token | null}
42
+ */
43
+ peek() {
44
+ if (this.index >= this.tokens.length) {
45
+ return null;
46
+ }
47
+ return this.tokens[this.index];
48
+ }
49
+
50
+ /**
51
+ * @returns {Token | null}
52
+ */
53
+ previous() {
54
+ if (this.index === 0) {
55
+ return null;
56
+ }
57
+ return this.tokens[this.index - 1];
58
+ }
59
+
60
+ /**
61
+ * @returns {Token | null}
62
+ */
63
+ advance() {
64
+ if (this.index >= this.tokens.length) {
65
+ return null;
66
+ }
67
+ return this.tokens[this.index++];
68
+ }
69
+
70
+ /**
71
+ * @returns {boolean}
72
+ */
73
+ isAtEnd() {
74
+ const token = this.peek();
75
+ return token === null || token.type === 'EOF';
76
+ }
77
+
78
+ /**
79
+ * @param {string} type
80
+ * @param {string} [value]
81
+ * @returns {boolean}
82
+ */
83
+ match(type, value) {
84
+ const token = this.peek();
85
+ if (!token || token.type !== type) {
86
+ return false;
87
+ }
88
+ if (value !== undefined && token.value !== value) {
89
+ return false;
90
+ }
91
+ return true;
92
+ }
93
+
94
+ /**
95
+ * @param {string} type
96
+ * @param {string} [value]
97
+ * @returns {{ token: Token | null, diagnostic: Diagnostic | null }}
98
+ */
99
+ expect(type, value) {
100
+ const token = this.peek();
101
+ if (!token || token.type !== type) {
102
+ const diagnostic = {
103
+ severity: 'error',
104
+ message: `Expected ${type}${value !== undefined ? ` with value "${value}"` : ''}, got ${token ? token.type : 'EOF'}`,
105
+ source: token ? token.span : undefined,
106
+ };
107
+ this.diagnostics.push(diagnostic);
108
+ return { token: null, diagnostic };
109
+ }
110
+ if (value !== undefined && token.value !== value) {
111
+ const diagnostic = {
112
+ severity: 'error',
113
+ message: `Expected ${type} with value "${value}", got "${token.value}"`,
114
+ source: token.span,
115
+ };
116
+ this.diagnostics.push(diagnostic);
117
+ return { token: null, diagnostic };
118
+ }
119
+ // Success: advance and return token
120
+ this.advance();
121
+ return { token, diagnostic: null };
122
+ }
123
+
124
+ /**
125
+ * @param {string} [value]
126
+ * @returns {{ token: Token | null, diagnostic: Diagnostic | null }}
127
+ */
128
+ expectIdentifierOrKeyword(value) {
129
+ const token = this.peek();
130
+ if (!token || (token.type !== 'KEYWORD' && token.type !== 'IDENTIFIER')) {
131
+ const diagnostic = {
132
+ severity: 'error',
133
+ message: `Expected identifier or keyword${value !== undefined ? ` "${value}"` : ''}, got ${token ? token.type : 'EOF'}`,
134
+ source: token ? token.span : undefined,
135
+ };
136
+ this.diagnostics.push(diagnostic);
137
+ return { token: null, diagnostic };
138
+ }
139
+ if (value !== undefined && token.value !== value) {
140
+ const diagnostic = {
141
+ severity: 'error',
142
+ message: `Expected identifier or keyword "${value}", got "${token.value}"`,
143
+ source: token.span,
144
+ };
145
+ this.diagnostics.push(diagnostic);
146
+ return { token: null, diagnostic };
147
+ }
148
+ // Success: advance and return token
149
+ this.advance();
150
+ return { token, diagnostic: null };
151
+ }
152
+
153
+ /**
154
+ * @param {string} value
155
+ * @returns {{ token: Token | null, diagnostic: Diagnostic | null }}
156
+ */
157
+ expectKindToken(value) {
158
+ return this.expectIdentifierOrKeyword(value);
159
+ }
160
+
161
+ /**
162
+ * Reads an identifier name (IDENTIFIER or KEYWORD)
163
+ * @returns {string | null}
164
+ */
165
+ readIdent() {
166
+ const token = this.peek();
167
+ if (!token || (token.type !== 'IDENTIFIER' && token.type !== 'KEYWORD')) {
168
+ return null;
169
+ }
170
+ this.advance();
171
+ return token.value;
172
+ }
173
+
174
+ /**
175
+ * Consumes an identifier, reporting diagnostic if missing
176
+ * @returns {string | null}
177
+ */
178
+ consumeIdentifier() {
179
+ const { token, diagnostic } = this.expectIdentifierOrKeyword();
180
+ return token ? token.value : null;
181
+ }
182
+
183
+ /**
184
+ * Parses an identifier path (IDENTIFIER (DOT IDENTIFIER)*)
185
+ * @returns {string | null}
186
+ */
187
+ parseIdentifierPath() {
188
+ if (!this.match('IDENTIFIER') && !this.match('KEYWORD')) {
189
+ return null;
190
+ }
191
+
192
+ const parts = [];
193
+ const firstToken = this.advance();
194
+ if (firstToken) {
195
+ parts.push(firstToken.value);
196
+ }
197
+
198
+ while (this.match('DOT')) {
199
+ this.advance(); // consume DOT
200
+ if (this.match('IDENTIFIER') || this.match('KEYWORD')) {
201
+ const partToken = this.advance();
202
+ if (partToken) {
203
+ parts.push(partToken.value);
204
+ }
205
+ } else {
206
+ break;
207
+ }
208
+ }
209
+
210
+ return parts.join('.');
211
+ }
212
+
213
+ /**
214
+ * Reports a diagnostic
215
+ * @param {'error' | 'warning'} severity
216
+ * @param {string} message
217
+ * @param {SourceSpan} [span]
218
+ */
219
+ reportDiagnostic(severity, message, span) {
220
+ this.diagnostics.push({
221
+ severity,
222
+ message,
223
+ source: span,
224
+ });
225
+ }
226
+
227
+ /**
228
+ * Creates a span from start and end tokens
229
+ * @param {Token} startToken
230
+ * @param {Token} endToken
231
+ * @returns {SourceSpan}
232
+ */
233
+ createSpan(startToken, endToken) {
234
+ return {
235
+ file: this.filePath,
236
+ start: startToken.span.start,
237
+ end: endToken.span.end,
238
+ };
239
+ }
240
+
241
+ /**
242
+ * Creates a span from a single token
243
+ * @param {Token} token
244
+ * @returns {SourceSpan}
245
+ */
246
+ spanFromToken(token) {
247
+ return token.span;
248
+ }
249
+
250
+ /**
251
+ * Parses raw content block by brace matching (returns spans only, no raw string)
252
+ * @param {string} kind - Block kind ('describe', 'title', 'note')
253
+ * @param {Token} keywordToken - The keyword token
254
+ * @returns {{ kind: string, contentSpan: { startOffset: number, endOffset: number }, span: SourceSpan } | null}
255
+ */
256
+ parseRawContentBlock(kind, keywordToken) {
257
+ const { token: lbrace, diagnostic } = this.expect('LBRACE');
258
+ if (!lbrace) {
259
+ return null;
260
+ }
261
+
262
+ // Find matching closing brace by tracking depth
263
+ let depth = 1;
264
+ const startOffset = lbrace.span.end.offset;
265
+ let endOffset = startOffset;
266
+ let endToken = null;
267
+
268
+ while (depth > 0 && this.index < this.tokens.length) {
269
+ const token = this.tokens[this.index];
270
+ if (token.type === 'EOF') break;
271
+
272
+ if (token.type === 'LBRACE') {
273
+ depth++;
274
+ this.index++;
275
+ } else if (token.type === 'RBRACE') {
276
+ depth--;
277
+ if (depth === 0) {
278
+ endToken = token;
279
+ endOffset = token.span.start.offset;
280
+ this.index++;
281
+ break;
282
+ } else {
283
+ this.index++;
284
+ }
285
+ } else {
286
+ this.index++;
287
+ }
288
+ }
289
+
290
+ if (depth > 0) {
291
+ this.reportDiagnostic('error', `Unclosed ${kind} block`, keywordToken.span);
292
+ return null;
293
+ }
294
+
295
+ return {
296
+ kind,
297
+ contentSpan: {
298
+ startOffset,
299
+ endOffset,
300
+ },
301
+ span: this.createSpan(keywordToken, endToken || lbrace),
302
+ };
303
+ }
304
+ }
package/src/scanner.js ADDED
@@ -0,0 +1,309 @@
1
+ /**
2
+ * @fileoverview Generic tokenizer/scanner for prose-like syntax
3
+ */
4
+
5
+ /**
6
+ * @typedef {Object} Token
7
+ * @property {string} type - Token type
8
+ * @property {string} value - Token value
9
+ * @property {SourceSpan} span - Source span
10
+ */
11
+
12
+ /**
13
+ * @typedef {Object} SourceSpan
14
+ * @property {string} file - File path
15
+ * @property {{ line: number, col: number, offset: number }} start - Start position
16
+ * @property {{ line: number, col: number, offset: number }} end - End position
17
+ */
18
+
19
+ /**
20
+ * Scans input text and returns tokens with source spans.
21
+ * If options.keywords (Set<string>) is provided, identifiers matching a keyword get type 'KEYWORD'; otherwise they stay 'IDENTIFIER'.
22
+ *
23
+ * @param {string} text - Input text
24
+ * @param {string} file - File path
25
+ * @param {{ keywords?: Set<string> }} [options] - Optional: keywords set for KEYWORD vs IDENTIFIER
26
+ * @returns {Token[]}
27
+ */
28
+ export function scan(text, file, options) {
29
+ const keywords = options?.keywords;
30
+ const tokens = [];
31
+ let offset = 0;
32
+ let line = 1;
33
+ let col = 1;
34
+
35
+ while (offset < text.length) {
36
+ const startOffset = offset;
37
+ const startLine = line;
38
+ const startCol = col;
39
+
40
+ const ch = text[offset];
41
+
42
+ // Skip whitespace (but track newlines for span calculations)
43
+ if (/\s/.test(ch)) {
44
+ if (ch === '\n') {
45
+ line++;
46
+ col = 1;
47
+ } else {
48
+ col++;
49
+ }
50
+ offset++;
51
+ continue;
52
+ }
53
+
54
+ // Block comment {* ... *} (no nesting; {* inside comment is plain text)
55
+ if (ch === '{' && offset + 1 < text.length && text[offset + 1] === '*') {
56
+ offset += 2;
57
+ col += 2;
58
+ let foundEnd = false;
59
+ while (offset < text.length) {
60
+ if (text[offset] === '*' && offset + 1 < text.length && text[offset + 1] === '}') {
61
+ offset += 2;
62
+ col += 2;
63
+ foundEnd = true;
64
+ break;
65
+ }
66
+ if (text[offset] === '\n') {
67
+ line++;
68
+ col = 1;
69
+ } else {
70
+ col++;
71
+ }
72
+ offset++;
73
+ }
74
+ if (!foundEnd) {
75
+ throw new Error(
76
+ `Unterminated block comment: expected \`*}\` before end of file at ${file}:${startLine}:${startCol}`,
77
+ );
78
+ }
79
+ continue;
80
+ }
81
+
82
+ // Stray *} (no matching {*)
83
+ if (ch === '*' && offset + 1 < text.length && text[offset + 1] === '}') {
84
+ throw new Error(`Stray \`*}\`: no matching \`{*\` at ${file}:${line}:${col}`);
85
+ }
86
+
87
+ // Single-quoted strings
88
+ // Only treat as string delimiter if it's clearly a string (not a contraction)
89
+ // A contraction is when a quote appears between two letters/digits
90
+ if (ch === "'") {
91
+ const prevCh = startOffset > 0 ? text[startOffset - 1] : null;
92
+ const nextCh = offset + 1 < text.length ? text[offset + 1] : null;
93
+
94
+ // If quote is between letters/digits, it's a contraction - don't parse as string
95
+ // Let it be handled by identifier parsing below
96
+ if (prevCh && /[A-Za-z0-9]/.test(prevCh) && nextCh && /[A-Za-z0-9]/.test(nextCh)) {
97
+ // This is a contraction, fall through to identifier parsing
98
+ } else {
99
+ // This looks like a string delimiter
100
+ offset++;
101
+ col++;
102
+ let value = '';
103
+ let escaped = false;
104
+
105
+ while (offset < text.length) {
106
+ const c = text[offset];
107
+ if (escaped) {
108
+ if (c === "'" || c === '\\') {
109
+ value += c;
110
+ } else {
111
+ value += '\\' + c;
112
+ }
113
+ escaped = false;
114
+ offset++;
115
+ col++;
116
+ } else if (c === '\\') {
117
+ escaped = true;
118
+ offset++;
119
+ col++;
120
+ } else if (c === "'") {
121
+ offset++;
122
+ col++;
123
+ break;
124
+ } else {
125
+ value += c;
126
+ offset++;
127
+ col++;
128
+ }
129
+ }
130
+
131
+ tokens.push({
132
+ type: 'STRING',
133
+ value,
134
+ span: {
135
+ file,
136
+ start: { line: startLine, col: startCol, offset: startOffset },
137
+ end: { line, col, offset },
138
+ },
139
+ });
140
+ continue;
141
+ }
142
+ }
143
+
144
+ // Braces
145
+ if (ch === '{') {
146
+ tokens.push({
147
+ type: 'LBRACE',
148
+ value: '{',
149
+ span: {
150
+ file,
151
+ start: { line: startLine, col: startCol, offset: startOffset },
152
+ end: { line, col: col + 1, offset: offset + 1 },
153
+ },
154
+ });
155
+ offset++;
156
+ col++;
157
+ continue;
158
+ }
159
+
160
+ if (ch === '}') {
161
+ tokens.push({
162
+ type: 'RBRACE',
163
+ value: '}',
164
+ span: {
165
+ file,
166
+ start: { line: startLine, col: startCol, offset: startOffset },
167
+ end: { line, col: col + 1, offset: offset + 1 },
168
+ },
169
+ });
170
+ offset++;
171
+ col++;
172
+ continue;
173
+ }
174
+
175
+ // Brackets
176
+ if (ch === '[') {
177
+ tokens.push({
178
+ type: 'LBRACKET',
179
+ value: '[',
180
+ span: {
181
+ file,
182
+ start: { line: startLine, col: startCol, offset: startOffset },
183
+ end: { line, col: col + 1, offset: offset + 1 },
184
+ },
185
+ });
186
+ offset++;
187
+ col++;
188
+ continue;
189
+ }
190
+
191
+ if (ch === ']') {
192
+ tokens.push({
193
+ type: 'RBRACKET',
194
+ value: ']',
195
+ span: {
196
+ file,
197
+ start: { line: startLine, col: startCol, offset: startOffset },
198
+ end: { line, col: col + 1, offset: offset + 1 },
199
+ },
200
+ });
201
+ offset++;
202
+ col++;
203
+ continue;
204
+ }
205
+
206
+ // Dot (for namespace paths)
207
+ if (ch === '.') {
208
+ tokens.push({
209
+ type: 'DOT',
210
+ value: '.',
211
+ span: {
212
+ file,
213
+ start: { line: startLine, col: startCol, offset: startOffset },
214
+ end: { line, col: col + 1, offset: offset + 1 },
215
+ },
216
+ });
217
+ offset++;
218
+ col++;
219
+ continue;
220
+ }
221
+
222
+ // Comma
223
+ if (ch === ',') {
224
+ tokens.push({
225
+ type: 'COMMA',
226
+ value: ',',
227
+ span: {
228
+ file,
229
+ start: { line: startLine, col: startCol, offset: startOffset },
230
+ end: { line, col: col + 1, offset: offset + 1 },
231
+ },
232
+ });
233
+ offset++;
234
+ col++;
235
+ continue;
236
+ }
237
+
238
+ // Numbers (integer literals)
239
+ if (/[0-9]/.test(ch) || (ch === '-' && /[0-9]/.test(text[offset + 1] || ''))) {
240
+ let value = '';
241
+ if (ch === '-') {
242
+ value += ch;
243
+ offset++;
244
+ col++;
245
+ }
246
+ while (offset < text.length && /[0-9]/.test(text[offset])) {
247
+ value += text[offset];
248
+ offset++;
249
+ col++;
250
+ }
251
+ tokens.push({
252
+ type: 'NUMBER',
253
+ value,
254
+ span: {
255
+ file,
256
+ start: { line: startLine, col: startCol, offset: startOffset },
257
+ end: { line, col, offset },
258
+ },
259
+ });
260
+ continue;
261
+ }
262
+
263
+ // Identifiers and keywords (including contractions with apostrophes)
264
+ if (/[A-Za-z_]/.test(ch)) {
265
+ let value = '';
266
+ while (
267
+ offset < text.length &&
268
+ ((/[A-Za-z0-9_]/.test(text[offset]) ||
269
+ (text[offset] === "'" &&
270
+ offset + 1 < text.length &&
271
+ /[A-Za-z0-9]/.test(text[offset + 1]))))
272
+ ) {
273
+ value += text[offset];
274
+ offset++;
275
+ col++;
276
+ }
277
+
278
+ const type = keywords && keywords.has(value) ? 'KEYWORD' : 'IDENTIFIER';
279
+
280
+ tokens.push({
281
+ type,
282
+ value,
283
+ span: {
284
+ file,
285
+ start: { line: startLine, col: startCol, offset: startOffset },
286
+ end: { line, col, offset },
287
+ },
288
+ });
289
+ continue;
290
+ }
291
+
292
+ // Unknown character - skip (tolerant parsing)
293
+ offset++;
294
+ col++;
295
+ }
296
+
297
+ // EOF token
298
+ tokens.push({
299
+ type: 'EOF',
300
+ value: '',
301
+ span: {
302
+ file,
303
+ start: { line, col, offset },
304
+ end: { line, col, offset },
305
+ },
306
+ });
307
+
308
+ return tokens;
309
+ }
@@ -0,0 +1,45 @@
1
+ /**
2
+ * @fileoverview Source span utilities for tracking source locations
3
+ */
4
+
5
+ /**
6
+ * @typedef {Object} SourceSpan
7
+ * @property {string} file - File path
8
+ * @property {{ line: number, col: number, offset: number }} start - Start position
9
+ * @property {{ line: number, col: number, offset: number }} end - End position
10
+ */
11
+
12
+ /**
13
+ * Creates a source span from start and end positions
14
+ * @param {string} file - File path
15
+ * @param {{ line: number, col: number, offset: number }} start - Start position
16
+ * @param {{ line: number, col: number, offset: number }} end - End position
17
+ * @returns {SourceSpan}
18
+ */
19
+ export function createSpan(file, start, end) {
20
+ return { file, start, end };
21
+ }
22
+
23
+ /**
24
+ * Creates a zero-length span at a position
25
+ * @param {string} file - File path
26
+ * @param {{ line: number, col: number, offset: number }} pos - Position
27
+ * @returns {SourceSpan}
28
+ */
29
+ export function createPointSpan(file, pos) {
30
+ return { file, start: pos, end: pos };
31
+ }
32
+
33
+ /**
34
+ * Merges two spans (from start of first to end of second)
35
+ * @param {SourceSpan} span1 - First span
36
+ * @param {SourceSpan} span2 - Second span
37
+ * @returns {SourceSpan}
38
+ */
39
+ export function mergeSpans(span1, span2) {
40
+ return {
41
+ file: span1.file,
42
+ start: span1.start,
43
+ end: span2.end,
44
+ };
45
+ }
@@ -0,0 +1,125 @@
1
+ /**
2
+ * @fileoverview Text normalization utilities
3
+ */
4
+
5
+ /**
6
+ * Counts leading whitespace, treating tabs as 2 spaces.
7
+ * @param {string} line - Line to count indentation for
8
+ * @returns {number} - Indentation count (spaces + tabs*2)
9
+ */
10
+ function countIndent(line) {
11
+ let count = 0;
12
+ for (let i = 0; i < line.length; i++) {
13
+ if (line[i] === ' ') {
14
+ count++;
15
+ } else if (line[i] === '\t') {
16
+ count += 2; // Treat tabs as 2 spaces
17
+ } else {
18
+ break;
19
+ }
20
+ }
21
+ return count;
22
+ }
23
+
24
+ /**
25
+ * Removes exactly `amount` worth of leading whitespace (spaces/tabs).
26
+ * Treats tabs as 2 spaces when removing.
27
+ * @param {string} line - Line to remove indentation from
28
+ * @param {number} amount - Amount of indentation to remove (in space units)
29
+ * @returns {string} - Line with indentation removed
30
+ */
31
+ function removeIndent(line, amount) {
32
+ let removed = 0;
33
+ let i = 0;
34
+ while (i < line.length && removed < amount) {
35
+ if (line[i] === ' ') {
36
+ removed++;
37
+ i++;
38
+ } else if (line[i] === '\t') {
39
+ removed += 2; // Treat tabs as 2 spaces
40
+ i++;
41
+ // If we overshoot, we've removed the tab, which is fine
42
+ } else {
43
+ break;
44
+ }
45
+ }
46
+ return line.slice(i);
47
+ }
48
+
49
+ /**
50
+ * Normalizes prose blocks by removing common indentation from lines after the first non-empty line.
51
+ * This handles the common case where the first line is flush-left (indent 0) and subsequent lines
52
+ * are indented due to code formatting.
53
+ *
54
+ * Algorithm:
55
+ * 1. Split raw by '\n' into lines
56
+ * 2. Find the first non-empty line index i0
57
+ * 3. Compute minIndentAfter from lines AFTER i0 (only non-empty lines)
58
+ * 4. Keep lines[0..i0] unchanged, then for lines after i0, remove minIndentAfter worth of indentation
59
+ *
60
+ * @param {string} raw - Raw text to normalize
61
+ * @returns {string} - Normalized text
62
+ */
63
+ export function normalizeProseBlock(raw) {
64
+ const lines = raw.split('\n');
65
+
66
+ // Find first non-empty line index
67
+ let i0 = -1;
68
+ for (let i = 0; i < lines.length; i++) {
69
+ if (lines[i].trim().length > 0) {
70
+ i0 = i;
71
+ break;
72
+ }
73
+ }
74
+
75
+ // If no non-empty line found, return raw unchanged
76
+ if (i0 === -1) {
77
+ return raw;
78
+ }
79
+
80
+ // Compute minIndentAfter from lines AFTER i0 (only non-empty lines)
81
+ // Use the minimum indent to preserve relative indentation while removing common prefix
82
+ let minIndentAfter = null;
83
+ for (let i = i0 + 1; i < lines.length; i++) {
84
+ if (lines[i].trim().length > 0) {
85
+ const indent = countIndent(lines[i]);
86
+ if (minIndentAfter === null || indent < minIndentAfter) {
87
+ minIndentAfter = indent;
88
+ }
89
+ }
90
+ }
91
+
92
+ // If no non-empty lines after i0, return raw unchanged
93
+ if (minIndentAfter === null || minIndentAfter === 0) {
94
+ return raw;
95
+ }
96
+
97
+ // Produce output: keep lines[0..i0] unchanged, then remove minIndentAfter from subsequent lines
98
+ const result = [];
99
+ for (let i = 0; i < lines.length; i++) {
100
+ if (i <= i0) {
101
+ // Keep lines up to and including first non-empty line unchanged
102
+ result.push(lines[i]);
103
+ } else {
104
+ // For lines after i0
105
+ if (lines[i].trim().length === 0) {
106
+ // Empty/whitespace-only lines: keep as empty string
107
+ result.push('');
108
+ } else {
109
+ // Remove exactly minIndentAfter worth of indentation
110
+ result.push(removeIndent(lines[i], minIndentAfter));
111
+ }
112
+ }
113
+ }
114
+
115
+ return result.join('\n');
116
+ }
117
+
118
+ /**
119
+ * @deprecated Use normalizeProseBlock instead
120
+ * @param {string} raw - Raw text to dedent
121
+ * @returns {string} - Dedented text
122
+ */
123
+ export function dedentPreserve(raw) {
124
+ return normalizeProseBlock(raw);
125
+ }
@@ -0,0 +1,47 @@
1
+ /**
2
+ * @fileoverview Tests for prose-parser ParserCore
3
+ */
4
+
5
+ import { test } from 'node:test';
6
+ import { scan, ParserCore } from '../src/index.js';
7
+
8
+ test('ParserCore parseIdentifierPath parses single identifier', () => {
9
+ const tokens = scan('foo', 'test.prose');
10
+ const parser = new ParserCore(tokens, 'test.prose', 'foo');
11
+ const path = parser.parseIdentifierPath();
12
+ assert(path === 'foo', 'path should be foo');
13
+ });
14
+
15
+ test('ParserCore parseIdentifierPath parses dotted path', () => {
16
+ const tokens = scan('a.b.c', 'test.prose');
17
+ const parser = new ParserCore(tokens, 'test.prose', 'a.b.c');
18
+ const path = parser.parseIdentifierPath();
19
+ assert(path === 'a.b.c', 'path should be a.b.c');
20
+ });
21
+
22
+ test('ParserCore parseRawContentBlock parses simple brace block', () => {
23
+ const tokens = scan("describe { inner }", 'test.prose');
24
+ const parser = new ParserCore(tokens, 'test.prose', "describe { inner }");
25
+ const keywordToken = tokens[0];
26
+ assert(keywordToken.type === 'IDENTIFIER' && keywordToken.value === 'describe', 'first token should be describe');
27
+ parser.advance(); // consume describe
28
+ const block = parser.parseRawContentBlock('describe', keywordToken);
29
+ assert(block !== null, 'block should be parsed');
30
+ assert(block.kind === 'describe', 'kind should be describe');
31
+ assert(block.contentSpan.startOffset < block.contentSpan.endOffset, 'contentSpan should have range');
32
+ });
33
+
34
+ test('ParserCore expect reports diagnostic on mismatch', () => {
35
+ const tokens = scan('foo', 'test.prose');
36
+ const parser = new ParserCore(tokens, 'test.prose', 'foo');
37
+ const { token, diagnostic } = parser.expect('LBRACE');
38
+ assert(token === null, 'token should be null');
39
+ assert(diagnostic !== null, 'diagnostic should be set');
40
+ assert(parser.diagnostics.length === 1, 'diagnostics should have one entry');
41
+ });
42
+
43
+ function assert(condition, message) {
44
+ if (!condition) {
45
+ throw new Error(message);
46
+ }
47
+ }
@@ -0,0 +1,123 @@
1
+ /**
2
+ * @fileoverview Tests for prose-parser scanner
3
+ */
4
+
5
+ import { test } from 'node:test';
6
+ import { deepStrictEqual, throws } from 'node:assert/strict';
7
+ import { scan } from '../src/scanner.js';
8
+
9
+ test('scan returns EOF for empty input', () => {
10
+ const tokens = scan('', 'test.prose');
11
+ assert(tokens.length === 1, 'should have one token');
12
+ assert(tokens[0].type === 'EOF', 'should be EOF');
13
+ });
14
+
15
+ test('scan tokenizes braces and comma without keywords', () => {
16
+ const tokens = scan(' { } , ', 'test.prose');
17
+ const types = tokens.map((t) => t.type);
18
+ assert(types.includes('LBRACE'), 'should have LBRACE');
19
+ assert(types.includes('RBRACE'), 'should have RBRACE');
20
+ assert(types.includes('COMMA'), 'should have COMMA');
21
+ assert(types[types.length - 1] === 'EOF', 'should end with EOF');
22
+ });
23
+
24
+ test('scan treats identifiers as IDENTIFIER when no keywords option', () => {
25
+ const tokens = scan('universe book', 'test.prose');
26
+ const idents = tokens.filter((t) => t.type === 'IDENTIFIER' || t.type === 'KEYWORD');
27
+ assert(idents.length === 2, 'should have two identifier-like tokens');
28
+ assert(idents[0].value === 'universe', 'first should be universe');
29
+ assert(idents[1].value === 'book', 'second should be book');
30
+ const hasKeyword = tokens.some((t) => t.type === 'KEYWORD');
31
+ assert(!hasKeyword, 'without keywords option, none should be KEYWORD');
32
+ });
33
+
34
+ test('scan treats keyword as KEYWORD when keywords option provided', () => {
35
+ const keywords = new Set(['universe', 'book']);
36
+ const tokens = scan('universe book foo', 'test.prose', { keywords });
37
+ const universeTok = tokens.find((t) => t.value === 'universe');
38
+ const bookTok = tokens.find((t) => t.value === 'book');
39
+ const fooTok = tokens.find((t) => t.value === 'foo');
40
+ assert(universeTok && universeTok.type === 'KEYWORD', 'universe should be KEYWORD');
41
+ assert(bookTok && bookTok.type === 'KEYWORD', 'book should be KEYWORD');
42
+ assert(fooTok && fooTok.type === 'IDENTIFIER', 'foo should be IDENTIFIER');
43
+ });
44
+
45
+ test('scan tokenizes single-quoted string', () => {
46
+ const tokens = scan("'hello'", 'test.prose');
47
+ const str = tokens.find((t) => t.type === 'STRING');
48
+ assert(str !== undefined, 'should have STRING token');
49
+ assert(str.value === 'hello', 'string value should be hello');
50
+ });
51
+
52
+ test('scan tokenizes dot for paths', () => {
53
+ const tokens = scan('a.b', 'test.prose');
54
+ const types = tokens.map((t) => t.type);
55
+ assert(types.includes('IDENTIFIER') || types.includes('KEYWORD'), 'should have identifier');
56
+ assert(types.includes('DOT'), 'should have DOT');
57
+ });
58
+
59
+ test('scan skips block comment: comment-only produces EOF only', () => {
60
+ const tokens = scan('{* hello *}', 'test.prose');
61
+ assert(tokens.length === 1, 'should have one token (EOF)');
62
+ assert(tokens[0].type === 'EOF', 'should be EOF');
63
+ });
64
+
65
+ test('scan skips block comment: comments between tokens', () => {
66
+ const withComment = scan('scene X { {* c *} actors { } }', 'test.prose');
67
+ const withoutComment = scan('scene X { actors { } }', 'test.prose');
68
+ const typesWith = withComment.filter((t) => t.type !== 'EOF').map((t) => t.type);
69
+ const typesWithout = withoutComment.filter((t) => t.type !== 'EOF').map((t) => t.type);
70
+ deepStrictEqual(typesWith, typesWithout, 'token types should match');
71
+ });
72
+
73
+ test('scan skips block comment: comment after tokens', () => {
74
+ const tokens = scan('kind { [ PlayerSkill ] } {* array of PlayerSkill *}', 'test.prose');
75
+ const nonEof = tokens.filter((t) => t.type !== 'EOF');
76
+ assert(nonEof.some((t) => t.value === 'kind'), 'should have kind');
77
+ assert(nonEof.some((t) => t.value === 'PlayerSkill'), 'should have PlayerSkill');
78
+ assert(tokens[tokens.length - 1].type === 'EOF', 'should end with EOF');
79
+ });
80
+
81
+ test('scan skips block comment: comments inside nested blocks', () => {
82
+ const withComment = scan(
83
+ 'many { from { Skills[] } {* source *} by { id } matches { row.skillId } }',
84
+ 'test.prose',
85
+ );
86
+ const withoutComment = scan(
87
+ 'many { from { Skills[] } by { id } matches { row.skillId } }',
88
+ 'test.prose',
89
+ );
90
+ const typesWith = withComment.filter((t) => t.type !== 'EOF').map((t) => t.type);
91
+ const typesWithout = withoutComment.filter((t) => t.type !== 'EOF').map((t) => t.type);
92
+ deepStrictEqual(typesWith, typesWithout, 'token types should match');
93
+ });
94
+
95
+ test('scan throws on unterminated block comment', () => {
96
+ throws(
97
+ () => scan('{* never ends', 'test.prose'),
98
+ (err) => {
99
+ assert(err instanceof Error);
100
+ assert(err.message.includes('Unterminated block comment'));
101
+ assert(err.message.includes('test.prose'));
102
+ return true;
103
+ },
104
+ );
105
+ });
106
+
107
+ test('scan throws on stray *}', () => {
108
+ throws(
109
+ () => scan('foo *} bar', 'test.prose'),
110
+ (err) => {
111
+ assert(err instanceof Error);
112
+ assert(err.message.includes('Stray'));
113
+ assert(err.message.includes('test.prose'));
114
+ return true;
115
+ },
116
+ );
117
+ });
118
+
119
+ function assert(condition, message) {
120
+ if (!condition) {
121
+ throw new Error(message);
122
+ }
123
+ }
@@ -0,0 +1,29 @@
1
+ /**
2
+ * @fileoverview Tests for prose-parser text utils
3
+ */
4
+
5
+ import { test } from 'node:test';
6
+ import { normalizeProseBlock } from '../src/index.js';
7
+
8
+ test('normalizeProseBlock returns empty string unchanged', () => {
9
+ assert(normalizeProseBlock('') === '', 'empty string');
10
+ });
11
+
12
+ test('normalizeProseBlock removes common indent', () => {
13
+ const raw = 'line0\n line1\n line2';
14
+ const out = normalizeProseBlock(raw);
15
+ assert(out === 'line0\nline1\nline2', 'should dedent second and third lines');
16
+ });
17
+
18
+ test('normalizeProseBlock preserves first line and empty lines', () => {
19
+ const raw = 'first\n\n indented';
20
+ const out = normalizeProseBlock(raw);
21
+ assert(out.split('\n')[0] === 'first', 'first line unchanged');
22
+ assert(out.split('\n')[1] === '', 'empty line preserved');
23
+ });
24
+
25
+ function assert(condition, message) {
26
+ if (!condition) {
27
+ throw new Error(message);
28
+ }
29
+ }
package/tsconfig.json ADDED
@@ -0,0 +1,14 @@
1
+ {
2
+ "compilerOptions": {
3
+ "checkJs": true,
4
+ "allowJs": true,
5
+ "noEmit": true,
6
+ "strict": true,
7
+ "target": "ES2022",
8
+ "module": "ES2022",
9
+ "moduleResolution": "node",
10
+ "esModuleInterop": true,
11
+ "skipLibCheck": true
12
+ },
13
+ "include": ["src/**/*", "test/**/*"]
14
+ }