@sprig-and-prose/sprig-universe 0.4.1 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,240 @@
1
+ /**
2
+ * @fileoverview Tokenizer/scanner for Sprig universe syntax
3
+ */
4
+
5
+ /**
6
+ * @typedef {Object} Token
7
+ * @property {string} type - Token type
8
+ * @property {string} value - Token value
9
+ * @property {SourceSpan} span - Source span
10
+ */
11
+
12
+ /**
13
+ * @typedef {Object} SourceSpan
14
+ * @property {string} file - File path
15
+ * @property {{ line: number, col: number, offset: number }} start - Start position
16
+ * @property {{ line: number, col: number, offset: number }} end - End position
17
+ */
18
+
19
+ const KEYWORDS = new Set([
20
+ 'universe',
21
+ 'anthology',
22
+ 'series',
23
+ 'book',
24
+ 'chapter',
25
+ 'concept',
26
+ 'in',
27
+ 'relates',
28
+ 'relationship',
29
+ 'relationships',
30
+ 'label',
31
+ 'and',
32
+ 'describe',
33
+ 'from',
34
+ 'aliases',
35
+ 'alias',
36
+ 'note',
37
+ 'title',
38
+ 'reference',
39
+ 'references',
40
+ 'paths',
41
+ 'url',
42
+ ]);
43
+
44
+ /**
45
+ * Scans input text and returns tokens with source spans
46
+ * @param {string} text - Input text
47
+ * @param {string} file - File path
48
+ * @returns {Token[]}
49
+ */
50
+ export function scan(text, file) {
51
+ const tokens = [];
52
+ let offset = 0;
53
+ let line = 1;
54
+ let col = 1;
55
+
56
+ while (offset < text.length) {
57
+ const startOffset = offset;
58
+ const startLine = line;
59
+ const startCol = col;
60
+
61
+ const ch = text[offset];
62
+
63
+ // Skip whitespace (but track newlines for span calculations)
64
+ if (/\s/.test(ch)) {
65
+ if (ch === '\n') {
66
+ line++;
67
+ col = 1;
68
+ } else {
69
+ col++;
70
+ }
71
+ offset++;
72
+ continue;
73
+ }
74
+
75
+ // Single-quoted strings
76
+ // Only treat as string delimiter if it's clearly a string (not a contraction)
77
+ // A contraction is when a quote appears between two letters/digits
78
+ if (ch === "'") {
79
+ const prevCh = startOffset > 0 ? text[startOffset - 1] : null;
80
+ const nextCh = offset + 1 < text.length ? text[offset + 1] : null;
81
+
82
+ // If quote is between letters/digits, it's a contraction - don't parse as string
83
+ // Let it be handled by identifier parsing below
84
+ if (prevCh && /[A-Za-z0-9]/.test(prevCh) && nextCh && /[A-Za-z0-9]/.test(nextCh)) {
85
+ // This is a contraction, fall through to identifier parsing
86
+ } else {
87
+ // This looks like a string delimiter
88
+ offset++;
89
+ col++;
90
+ let value = '';
91
+ let escaped = false;
92
+
93
+ while (offset < text.length) {
94
+ const c = text[offset];
95
+ if (escaped) {
96
+ if (c === "'" || c === '\\') {
97
+ value += c;
98
+ } else {
99
+ value += '\\' + c;
100
+ }
101
+ escaped = false;
102
+ offset++;
103
+ col++;
104
+ } else if (c === '\\') {
105
+ escaped = true;
106
+ offset++;
107
+ col++;
108
+ } else if (c === "'") {
109
+ offset++;
110
+ col++;
111
+ break;
112
+ } else {
113
+ value += c;
114
+ offset++;
115
+ col++;
116
+ }
117
+ }
118
+
119
+ tokens.push({
120
+ type: 'STRING',
121
+ value,
122
+ span: {
123
+ file,
124
+ start: { line: startLine, col: startCol, offset: startOffset },
125
+ end: { line, col, offset },
126
+ },
127
+ });
128
+ continue;
129
+ }
130
+ }
131
+
132
+ // Braces
133
+ if (ch === '{') {
134
+ tokens.push({
135
+ type: 'LBRACE',
136
+ value: '{',
137
+ span: {
138
+ file,
139
+ start: { line: startLine, col: startCol, offset: startOffset },
140
+ end: { line, col: col + 1, offset: offset + 1 },
141
+ },
142
+ });
143
+ offset++;
144
+ col++;
145
+ continue;
146
+ }
147
+
148
+ if (ch === '}') {
149
+ tokens.push({
150
+ type: 'RBRACE',
151
+ value: '}',
152
+ span: {
153
+ file,
154
+ start: { line: startLine, col: startCol, offset: startOffset },
155
+ end: { line, col: col + 1, offset: offset + 1 },
156
+ },
157
+ });
158
+ offset++;
159
+ col++;
160
+ continue;
161
+ }
162
+
163
+ // Dot (for namespace paths)
164
+ if (ch === '.') {
165
+ tokens.push({
166
+ type: 'DOT',
167
+ value: '.',
168
+ span: {
169
+ file,
170
+ start: { line: startLine, col: startCol, offset: startOffset },
171
+ end: { line, col: col + 1, offset: offset + 1 },
172
+ },
173
+ });
174
+ offset++;
175
+ col++;
176
+ continue;
177
+ }
178
+
179
+ // Comma
180
+ if (ch === ',') {
181
+ tokens.push({
182
+ type: 'COMMA',
183
+ value: ',',
184
+ span: {
185
+ file,
186
+ start: { line: startLine, col: startCol, offset: startOffset },
187
+ end: { line, col: col + 1, offset: offset + 1 },
188
+ },
189
+ });
190
+ offset++;
191
+ col++;
192
+ continue;
193
+ }
194
+
195
+ // Identifiers and keywords (including contractions with apostrophes)
196
+ if (/[A-Za-z_]/.test(ch)) {
197
+ let value = '';
198
+ while (
199
+ offset < text.length &&
200
+ (/[A-Za-z0-9_]/.test(text[offset]) ||
201
+ (text[offset] === "'" && offset + 1 < text.length && /[A-Za-z0-9]/.test(text[offset + 1])))
202
+ ) {
203
+ value += text[offset];
204
+ offset++;
205
+ col++;
206
+ }
207
+
208
+ const type = KEYWORDS.has(value) ? 'KEYWORD' : 'IDENTIFIER';
209
+
210
+ tokens.push({
211
+ type,
212
+ value,
213
+ span: {
214
+ file,
215
+ start: { line: startLine, col: startCol, offset: startOffset },
216
+ end: { line, col, offset },
217
+ },
218
+ });
219
+ continue;
220
+ }
221
+
222
+ // Unknown character - emit as error token or skip?
223
+ // For now, skip and continue (tolerant parsing)
224
+ offset++;
225
+ col++;
226
+ }
227
+
228
+ // EOF token
229
+ tokens.push({
230
+ type: 'EOF',
231
+ value: '',
232
+ span: {
233
+ file,
234
+ start: { line, col, offset },
235
+ end: { line, col, offset },
236
+ },
237
+ });
238
+
239
+ return tokens;
240
+ }