@iyulab/m3l 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/lexer.js ADDED
@@ -0,0 +1,421 @@
1
+ // --- Regex patterns ---
2
+ const RE_H1 = /^# (.+)$/;
3
+ const RE_H2 = /^## (.+)$/;
4
+ const RE_H3 = /^### (.+)$/;
5
+ const RE_HR = /^-{3,}$/;
6
+ const RE_BLOCKQUOTE = /^> (.+)$/;
7
+ const RE_LIST_ITEM = /^(\s*)- (.+)$/;
8
+ const RE_BLANK = /^\s*$/;
9
+ // H2 sub-patterns
10
+ const RE_TYPE_INDICATOR = /^([\w][\w.]*(?:\([^)]*\))?)\s*::(\w+)(.*)$/;
11
+ const RE_MODEL_DEF = /^([\w][\w.]*(?:\([^)]*\))?)\s*(?::\s*(.+?))?(\s+@.+)?$/;
12
+ // Field line patterns
13
+ const RE_FIELD_NAME = /^([\w]+)(?:\(([^)]*)\))?\s*(?::\s*(.+))?$/;
14
+ const RE_TYPE_PART = /^([\w]+)(?:\(([^)]*)\))?(\?)?(\[\])?/;
15
+ const RE_FRAMEWORK_ATTR = /`\[([^\]]+)\]`/g;
16
+ const RE_INLINE_COMMENT = /\s+#\s+(.+)$/;
17
+ // Known kind-context H1 headers
18
+ const KIND_SECTIONS = new Set(['Lookup', 'Rollup', 'Computed', 'Computed from Rollup']);
19
+ /**
20
+ * Tokenize M3L markdown content into a sequence of tokens.
21
+ */
22
+ export function lex(content, file) {
23
+ const lines = content.split(/\r?\n/);
24
+ const tokens = [];
25
+ for (let i = 0; i < lines.length; i++) {
26
+ const raw = lines[i];
27
+ const lineNum = i + 1;
28
+ // Blank line
29
+ if (RE_BLANK.test(raw)) {
30
+ tokens.push({ type: 'blank', raw, line: lineNum, indent: 0 });
31
+ continue;
32
+ }
33
+ // Horizontal rule
34
+ if (RE_HR.test(raw.trim())) {
35
+ tokens.push({ type: 'horizontal_rule', raw, line: lineNum, indent: 0 });
36
+ continue;
37
+ }
38
+ // H3 — Section header
39
+ const h3Match = raw.match(RE_H3);
40
+ if (h3Match) {
41
+ tokens.push({
42
+ type: 'section',
43
+ raw,
44
+ line: lineNum,
45
+ indent: 0,
46
+ data: { name: h3Match[1].trim() },
47
+ });
48
+ continue;
49
+ }
50
+ // H2 — Model/Enum/Interface/View
51
+ const h2Match = raw.match(RE_H2);
52
+ if (h2Match) {
53
+ const h2Content = h2Match[1].trim();
54
+ tokens.push(tokenizeH2(h2Content, raw, lineNum));
55
+ continue;
56
+ }
57
+ // H1 — Namespace or kind-section context
58
+ const h1Match = raw.match(RE_H1);
59
+ if (h1Match) {
60
+ const h1Content = h1Match[1].trim();
61
+ // Check if this is a kind section (# Lookup, # Rollup, # Computed)
62
+ if (KIND_SECTIONS.has(h1Content)) {
63
+ tokens.push({
64
+ type: 'section',
65
+ raw,
66
+ line: lineNum,
67
+ indent: 0,
68
+ data: { name: h1Content, kind_section: true },
69
+ });
70
+ }
71
+ else {
72
+ tokens.push({
73
+ type: 'namespace',
74
+ raw,
75
+ line: lineNum,
76
+ indent: 0,
77
+ data: parseNamespace(h1Content),
78
+ });
79
+ }
80
+ continue;
81
+ }
82
+ // Blockquote
83
+ const bqMatch = raw.match(RE_BLOCKQUOTE);
84
+ if (bqMatch) {
85
+ tokens.push({
86
+ type: 'blockquote',
87
+ raw,
88
+ line: lineNum,
89
+ indent: 0,
90
+ data: { text: bqMatch[1].trim() },
91
+ });
92
+ continue;
93
+ }
94
+ // List item (field or nested item)
95
+ const listMatch = raw.match(RE_LIST_ITEM);
96
+ if (listMatch) {
97
+ const indent = listMatch[1].length;
98
+ const itemContent = listMatch[2];
99
+ if (indent >= 2) {
100
+ // Nested item (indented)
101
+ tokens.push({
102
+ type: 'nested_item',
103
+ raw,
104
+ line: lineNum,
105
+ indent,
106
+ data: parseNestedItem(itemContent),
107
+ });
108
+ }
109
+ else {
110
+ // Top-level list item — field
111
+ tokens.push({
112
+ type: 'field',
113
+ raw,
114
+ line: lineNum,
115
+ indent: 0,
116
+ data: parseFieldLine(itemContent),
117
+ });
118
+ }
119
+ continue;
120
+ }
121
+ // @import directive (top-level)
122
+ const importMatch = raw.trim().match(/^@import\s+["'](.+?)["']\s*$/);
123
+ if (importMatch) {
124
+ tokens.push({
125
+ type: 'text',
126
+ raw,
127
+ line: lineNum,
128
+ indent: 0,
129
+ data: { text: raw.trim(), is_import: true, import_path: importMatch[1] },
130
+ });
131
+ continue;
132
+ }
133
+ // Plain text (model description, etc.)
134
+ tokens.push({
135
+ type: 'text',
136
+ raw,
137
+ line: lineNum,
138
+ indent: 0,
139
+ data: { text: raw.trim() },
140
+ });
141
+ }
142
+ return tokens;
143
+ }
144
+ function tokenizeH2(content, raw, line) {
145
+ // Check for type indicator: ## Name ::enum, ::interface, ::view
146
+ const typeMatch = content.match(RE_TYPE_INDICATOR);
147
+ if (typeMatch) {
148
+ const namepart = typeMatch[1];
149
+ const typeIndicator = typeMatch[2];
150
+ const rest = typeMatch[3]?.trim() || '';
151
+ const { name, label } = parseNameLabel(namepart);
152
+ const data = { name, label };
153
+ if (typeIndicator === 'view') {
154
+ data.materialized = rest.includes('@materialized');
155
+ }
156
+ // Extract description from rest: "description text"
157
+ const descMatch = rest.match(/"([^"]+)"/);
158
+ if (descMatch) {
159
+ data.description = descMatch[1];
160
+ }
161
+ return { type: typeIndicator, raw, line, indent: 0, data };
162
+ }
163
+ // Regular model: ## Name : Parent1, Parent2
164
+ const modelMatch = content.match(RE_MODEL_DEF);
165
+ if (modelMatch) {
166
+ const namepart = modelMatch[1];
167
+ const inheritsStr = modelMatch[2]?.trim();
168
+ const attrsStr = modelMatch[3]?.trim();
169
+ const { name, label } = parseNameLabel(namepart);
170
+ const inherits = inheritsStr
171
+ ? inheritsStr.split(',').map(s => s.trim()).filter(Boolean)
172
+ : [];
173
+ const data = { name, label, inherits };
174
+ // Parse model-level attributes
175
+ if (attrsStr) {
176
+ const attrs = [];
177
+ const attrRe = /@([\w]+)(?:\(([^)]*)\))?/g;
178
+ let m;
179
+ while ((m = attrRe.exec(attrsStr)) !== null) {
180
+ attrs.push({ name: m[1], args: m[2] ? [m[2]] : undefined });
181
+ }
182
+ data.attributes = attrs;
183
+ }
184
+ return { type: 'model', raw, line, indent: 0, data };
185
+ }
186
+ // Fallback
187
+ return {
188
+ type: 'model',
189
+ raw,
190
+ line,
191
+ indent: 0,
192
+ data: { name: content, inherits: [] },
193
+ };
194
+ }
195
+ function parseNameLabel(s) {
196
+ const m = s.match(/^([\w][\w.]*)\(([^)]*)\)$/);
197
+ if (m) {
198
+ return { name: m[1], label: m[2] };
199
+ }
200
+ return { name: s };
201
+ }
202
+ function parseNamespace(content) {
203
+ // # Namespace: domain.example
204
+ const nsMatch = content.match(/^Namespace:\s*(.+)$/);
205
+ if (nsMatch) {
206
+ return { name: nsMatch[1].trim(), is_namespace: true };
207
+ }
208
+ // # Document Title
209
+ return { name: content, is_namespace: false };
210
+ }
211
+ function parseFieldLine(content) {
212
+ const data = {};
213
+ // Check for attribute-only line: @meta(...), @index(...), @relation(...)
214
+ if (content.startsWith('@')) {
215
+ data.is_directive = true;
216
+ data.raw_content = content;
217
+ data.attributes = parseAttributesBalanced(content);
218
+ return data;
219
+ }
220
+ // Strip inline comment
221
+ const commentMatch = content.match(RE_INLINE_COMMENT);
222
+ if (commentMatch) {
223
+ data.comment = commentMatch[1];
224
+ content = content.replace(RE_INLINE_COMMENT, '');
225
+ }
226
+ // Extract framework attributes (backtick-wrapped)
227
+ const frameworkAttrs = [];
228
+ let fwMatch;
229
+ const fwRe = /`\[([^\]]+)\]`/g;
230
+ while ((fwMatch = fwRe.exec(content)) !== null) {
231
+ frameworkAttrs.push(`[${fwMatch[1]}]`);
232
+ }
233
+ if (frameworkAttrs.length > 0) {
234
+ data.framework_attrs = frameworkAttrs;
235
+ content = content.replace(/`\[[^\]]+\]`/g, '').trim();
236
+ }
237
+ // Special case: NAME "desc" or NAME(label) "desc" (no colon — enum value with description)
238
+ const enumValueMatch = content.match(/^([\w]+)(?:\(([^)]*)\))?\s+"((?:[^"\\]|\\.)*)"$/);
239
+ if (enumValueMatch) {
240
+ data.name = enumValueMatch[1];
241
+ if (enumValueMatch[2])
242
+ data.label = enumValueMatch[2];
243
+ data.description = enumValueMatch[3];
244
+ return data;
245
+ }
246
+ // Parse name(label): type_and_rest
247
+ const fieldMatch = content.match(RE_FIELD_NAME);
248
+ if (!fieldMatch) {
249
+ data.name = content;
250
+ return data;
251
+ }
252
+ data.name = fieldMatch[1];
253
+ if (fieldMatch[2]) {
254
+ data.label = fieldMatch[2];
255
+ }
256
+ const rest = fieldMatch[3]?.trim();
257
+ if (!rest) {
258
+ // Name only — could be extended format header or enum value
259
+ return data;
260
+ }
261
+ // Preserve full rest for context-dependent parsing (e.g., source directives)
262
+ data.raw_value = rest;
263
+ // Parse: type(params)?[]? = default @attrs "description"
264
+ parseTypeAndAttrs(rest, data);
265
+ return data;
266
+ }
267
+ function parseTypeAndAttrs(rest, data) {
268
+ let pos = 0;
269
+ const len = rest.length;
270
+ const skipWS = () => { while (pos < len && rest[pos] === ' ')
271
+ pos++; };
272
+ // Check if the entire rest is a quoted string (e.g., available: "Available")
273
+ if (rest[0] === '"') {
274
+ const closeIdx = findClosingQuote(rest, 0);
275
+ if (closeIdx >= 0 && closeIdx === len - 1) {
276
+ data.description = rest.slice(1, closeIdx);
277
+ return;
278
+ }
279
+ }
280
+ // Parse type: word(params)?[]?
281
+ const typeMatch = rest.match(RE_TYPE_PART);
282
+ if (typeMatch) {
283
+ data.type_name = typeMatch[1];
284
+ if (typeMatch[2]) {
285
+ data.type_params = typeMatch[2].split(',').map(s => s.trim());
286
+ }
287
+ data.nullable = typeMatch[3] === '?';
288
+ data.array = typeMatch[4] === '[]';
289
+ pos = typeMatch[0].length;
290
+ skipWS();
291
+ }
292
+ // Parse default value: = "quoted" or = unquoted
293
+ if (pos < len && rest[pos] === '=') {
294
+ pos++; // skip =
295
+ skipWS();
296
+ if (pos < len && rest[pos] === '"') {
297
+ const closeIdx = findClosingQuote(rest, pos);
298
+ if (closeIdx >= 0) {
299
+ data.default_value = rest.slice(pos, closeIdx + 1);
300
+ pos = closeIdx + 1;
301
+ skipWS();
302
+ }
303
+ }
304
+ else {
305
+ // Unquoted default: read until whitespace, @, or "
306
+ const start = pos;
307
+ while (pos < len && rest[pos] !== ' ' && rest[pos] !== '@' && rest[pos] !== '"') {
308
+ if (rest[pos] === '(') {
309
+ const closeP = findBalancedParen(rest, pos);
310
+ pos = closeP >= 0 ? closeP + 1 : pos + 1;
311
+ }
312
+ else {
313
+ pos++;
314
+ }
315
+ }
316
+ data.default_value = rest.slice(start, pos);
317
+ skipWS();
318
+ }
319
+ }
320
+ // Parse attributes: @name or @name(balanced_args)
321
+ const attrs = [];
322
+ while (pos < len && rest[pos] === '@') {
323
+ pos++; // skip @
324
+ const nameStart = pos;
325
+ while (pos < len && /\w/.test(rest[pos]))
326
+ pos++;
327
+ const attrName = rest.slice(nameStart, pos);
328
+ let args;
329
+ if (pos < len && rest[pos] === '(') {
330
+ const closeP = findBalancedParen(rest, pos);
331
+ if (closeP >= 0) {
332
+ args = rest.slice(pos + 1, closeP);
333
+ pos = closeP + 1;
334
+ }
335
+ }
336
+ attrs.push({ name: attrName, args });
337
+ skipWS();
338
+ }
339
+ if (attrs.length > 0) {
340
+ data.attributes = attrs;
341
+ }
342
+ // Trailing description "..."
343
+ skipWS();
344
+ if (pos < len && rest[pos] === '"') {
345
+ const closeIdx = findClosingQuote(rest, pos);
346
+ if (closeIdx >= 0) {
347
+ data.description = rest.slice(pos + 1, closeIdx);
348
+ }
349
+ }
350
+ }
351
+ function findBalancedParen(str, openPos) {
352
+ let depth = 0;
353
+ for (let i = openPos; i < str.length; i++) {
354
+ if (str[i] === '(') {
355
+ depth++;
356
+ }
357
+ else if (str[i] === ')') {
358
+ depth--;
359
+ if (depth === 0)
360
+ return i;
361
+ }
362
+ else if (str[i] === '"') {
363
+ const closeQ = findClosingQuote(str, i);
364
+ if (closeQ >= 0)
365
+ i = closeQ;
366
+ }
367
+ else if (str[i] === "'") {
368
+ const closeQ = str.indexOf("'", i + 1);
369
+ if (closeQ >= 0)
370
+ i = closeQ;
371
+ }
372
+ }
373
+ return -1;
374
+ }
375
+ function findClosingQuote(str, openPos) {
376
+ for (let i = openPos + 1; i < str.length; i++) {
377
+ if (str[i] === '\\') {
378
+ i++;
379
+ continue;
380
+ }
381
+ if (str[i] === '"')
382
+ return i;
383
+ }
384
+ return -1;
385
+ }
386
+ function parseAttributesBalanced(content) {
387
+ const attrs = [];
388
+ let pos = 0;
389
+ const len = content.length;
390
+ while (pos < len) {
391
+ const atIdx = content.indexOf('@', pos);
392
+ if (atIdx < 0)
393
+ break;
394
+ pos = atIdx + 1;
395
+ const nameStart = pos;
396
+ while (pos < len && /\w/.test(content[pos]))
397
+ pos++;
398
+ const name = content.slice(nameStart, pos);
399
+ if (!name)
400
+ continue;
401
+ let args;
402
+ if (pos < len && content[pos] === '(') {
403
+ const closeP = findBalancedParen(content, pos);
404
+ if (closeP >= 0) {
405
+ args = content.slice(pos + 1, closeP);
406
+ pos = closeP + 1;
407
+ }
408
+ }
409
+ attrs.push({ name, args });
410
+ }
411
+ return attrs;
412
+ }
413
+ function parseNestedItem(content) {
414
+ // key: value or just value
415
+ const kvMatch = content.match(/^([\w]+)\s*:\s*(.+)$/);
416
+ if (kvMatch) {
417
+ return { key: kvMatch[1], value: kvMatch[2].trim(), raw_content: content };
418
+ }
419
+ // value-only nested item (rare)
420
+ return { raw_content: content };
421
+ }
@@ -0,0 +1,9 @@
1
+ import type { Token, ParsedFile } from './types.js';
2
+ /**
3
+ * Parse M3L content string into a ParsedFile AST.
4
+ */
5
+ export declare function parseString(content: string, file: string): ParsedFile;
6
+ /**
7
+ * Parse a token sequence into a ParsedFile AST.
8
+ */
9
+ export declare function parseTokens(tokens: Token[], file: string): ParsedFile;