recipe-tmlanguage 0.3.2 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,466 @@
1
+ #!/usr/bin/env node
2
+ import { createRequire } from "node:module";
3
+ import { mkdirSync, readFileSync, readdirSync, writeFileSync } from "node:fs";
4
+ import { dirname, resolve } from "node:path";
5
+ import { cwd, exit } from "node:process";
6
+ import { fileURLToPath } from "node:url";
7
+ import { cli, command, flag } from "dreamcli";
8
+ import { COUNTERS, NUMBER_WORDS, PERIODS, PERIOD_PLURALS } from "tree-sitter-recipe/grammar/dutch";
9
+ import { COMPOUNDING, COMPOUNDING_MULTIWORD, CONDITIONAL, CONDITIONAL_MULTIWORD, DISPENSING, DISPENSING_MULTIWORD, FORMS, FORMS_MULTIWORD, FREQUENCY, ROUTE, ROUTE_MULTIWORD, TIMING, TIMING_MULTIWORD, WARNING } from "tree-sitter-recipe/grammar/latin";
10
+ import { UNITS } from "tree-sitter-recipe/grammar/units";
11
+ //#region src/grammar.ts
12
+ /**
13
+ * @file Pure grammar builder — imports the tree-sitter-recipe vocabulary and
14
+ * compiles it into a TextMate grammar object. No filesystem I/O; the CLI
15
+ * handles serialization and writes.
16
+ *
17
+ * Scopes are standard TextMate names with a `.recipe` suffix so themes paint
18
+ * recipe blocks without a custom theme shipment.
19
+ */
20
+ const SCOPE = {
21
+ rxMarker: "keyword.control.directive.rx.recipe",
22
+ dispenseMarker: "keyword.control.directive.dispense.recipe",
23
+ signaMarker: "keyword.control.directive.signa.recipe",
24
+ frequency: "keyword.other.frequency.recipe",
25
+ timing: "keyword.other.timing.recipe",
26
+ route: "support.function.route.recipe",
27
+ dispensing: "entity.other.attribute-name.recipe",
28
+ warning: "invalid.illegal.warning.recipe",
29
+ form: "storage.type.form.recipe",
30
+ compounding: "keyword.operator.compounding.recipe",
31
+ conditional: "keyword.control.conditional.recipe",
32
+ fillMarker: "keyword.operator.fill.recipe",
33
+ dtdKeyword: "keyword.operator.dtd.recipe",
34
+ number: "constant.numeric.recipe",
35
+ unit: "support.type.unit.recipe",
36
+ lineComment: "comment.line.number-sign.recipe",
37
+ docCommentLine: "comment.line.documentation.recipe",
38
+ blockComment: "comment.block.recipe",
39
+ docCommentBlock: "comment.block.documentation.recipe",
40
+ punctuation: "punctuation.separator.recipe",
41
+ ingredientWord: "variable.other.ingredient.recipe",
42
+ signaWord: "string.unquoted.signa.recipe",
43
+ dispenseWord: "variable.other.dispense.recipe"
44
+ };
45
+ const REGEX_METACHARS = /[.*+?^${}()|[\]\\]/g;
46
+ const escapeRegex = (s) => s.replace(REGEX_METACHARS, "\\$&");
47
+ const alt = (items) => [...new Set(items)].sort((a, b) => b.length - a.length).map(escapeRegex).join("|");
48
+ const altMultiword = (items) => [...new Set(items)].sort((a, b) => b.length - a.length).map((s) => s.replace(/\./g, "\\.").replace(/\s+/g, "\\s+")).join("|");
49
+ const wb = (pattern) => `(?<![\\w.])(?:${pattern})(?![\\w.])`;
50
+ function buildGrammar() {
51
+ const doseMatch = {
52
+ match: `(\\d+(?:[.,]\\d+)?)\\s*(${alt(UNITS)})(?![A-Za-zÀ-ÿ])`,
53
+ captures: {
54
+ "1": { name: SCOPE.number },
55
+ "2": { name: SCOPE.unit }
56
+ }
57
+ };
58
+ const bareNumber = {
59
+ match: "\\d+(?:[.,]\\d+)?",
60
+ name: SCOPE.number
61
+ };
62
+ const compactFrequency = {
63
+ match: "[1-9]\\s*dd(?![A-Za-zÀ-ÿ0-9])",
64
+ name: SCOPE.frequency
65
+ };
66
+ const fillTo = {
67
+ match: "\\bad\\b(?=\\s+\\d)",
68
+ name: SCOPE.fillMarker
69
+ };
70
+ const dtdDirective = {
71
+ match: "(?i)(?<![\\w.])(d\\.?t\\.?d\\.?)(?:\\s+(no))?(?=\\s+\\d)",
72
+ captures: {
73
+ "1": { name: SCOPE.dtdKeyword },
74
+ "2": { name: SCOPE.dtdKeyword }
75
+ }
76
+ };
77
+ const warningAbbrev = {
78
+ match: wb(alt(WARNING)),
79
+ name: SCOPE.warning
80
+ };
81
+ const latinAbbrevs = [
82
+ {
83
+ match: wb(altMultiword(TIMING_MULTIWORD)),
84
+ name: SCOPE.timing
85
+ },
86
+ {
87
+ match: wb(altMultiword(ROUTE_MULTIWORD)),
88
+ name: SCOPE.route
89
+ },
90
+ {
91
+ match: wb(altMultiword(DISPENSING_MULTIWORD)),
92
+ name: SCOPE.dispensing
93
+ },
94
+ {
95
+ match: wb(altMultiword(FORMS_MULTIWORD)),
96
+ name: SCOPE.form
97
+ },
98
+ {
99
+ match: wb(altMultiword(COMPOUNDING_MULTIWORD)),
100
+ name: SCOPE.compounding
101
+ },
102
+ {
103
+ match: wb(altMultiword(CONDITIONAL_MULTIWORD)),
104
+ name: SCOPE.conditional
105
+ },
106
+ {
107
+ match: wb(alt(FREQUENCY)),
108
+ name: SCOPE.frequency
109
+ },
110
+ {
111
+ match: wb(alt(TIMING)),
112
+ name: SCOPE.timing
113
+ },
114
+ {
115
+ match: wb(alt(ROUTE)),
116
+ name: SCOPE.route
117
+ },
118
+ {
119
+ match: wb(alt(DISPENSING)),
120
+ name: SCOPE.dispensing
121
+ },
122
+ {
123
+ match: wb(alt(FORMS)),
124
+ name: SCOPE.form
125
+ },
126
+ {
127
+ match: wb(alt(COMPOUNDING)),
128
+ name: SCOPE.compounding
129
+ },
130
+ {
131
+ match: wb(alt(CONDITIONAL)),
132
+ name: SCOPE.conditional
133
+ }
134
+ ];
135
+ const punctuation = {
136
+ match: "[-.,;:()]",
137
+ name: SCOPE.punctuation
138
+ };
139
+ const comments = [
140
+ {
141
+ name: SCOPE.docCommentBlock,
142
+ begin: "/\\*\\*",
143
+ end: "\\*/"
144
+ },
145
+ {
146
+ name: SCOPE.blockComment,
147
+ begin: "/\\*",
148
+ end: "\\*/"
149
+ },
150
+ {
151
+ name: SCOPE.docCommentLine,
152
+ match: "#!.*$"
153
+ },
154
+ {
155
+ name: SCOPE.lineComment,
156
+ match: "#.*$"
157
+ }
158
+ ];
159
+ const period = alt(PERIODS);
160
+ const dutchFrequency = [
161
+ {
162
+ match: `(?i)\\bom[ \\t]+de(?:[ \\t]+andere)?(?:[ \\t]+\\d+)?[ \\t]+(?:${alt([...PERIOD_PLURALS, ...PERIODS])})\\b`,
163
+ name: SCOPE.frequency
164
+ },
165
+ {
166
+ match: `(?i)\\b\\d+[ \\t]*(?:${alt(COUNTERS)})[ \\t]+(?:per[ \\t]+(?:${period})|daags)\\b`,
167
+ name: SCOPE.frequency
168
+ },
169
+ {
170
+ match: `(?i)\\b(?:${alt(NUMBER_WORDS)})[ \\t]*maal(?:[ \\t]+(?:daags|per[ \\t]+(?:${period})))?\\b`,
171
+ name: SCOPE.frequency
172
+ }
173
+ ];
174
+ const sharedAtoms = [
175
+ ...comments,
176
+ warningAbbrev,
177
+ dtdDirective,
178
+ fillTo,
179
+ compactFrequency,
180
+ ...dutchFrequency,
181
+ doseMatch,
182
+ ...latinAbbrevs,
183
+ bareNumber,
184
+ punctuation
185
+ ];
186
+ /**
187
+ * Sections end only at the literal next marker (R/, Da/, D/, S/) or EOF.
188
+ * The trailing slash is load-bearing: without it, `s\b` inside `s.o.s.`
189
+ * would spuriously close a signa section because `.` is non-word.
190
+ */
191
+ const nextSection = "(?i)(?=R/|Da?/|S/)|\\z";
192
+ const makeSection = (begin, marker, wordScope) => ({
193
+ name: `meta.section.${wordScope.split(".")[2] ?? "unknown"}.recipe`,
194
+ begin,
195
+ beginCaptures: { "0": { name: marker } },
196
+ end: nextSection,
197
+ patterns: [...sharedAtoms, {
198
+ match: "[A-Za-zÀ-ÿ][A-Za-zÀ-ÿ0-9\\-]*",
199
+ name: wordScope
200
+ }]
201
+ });
202
+ const rxSection = makeSection("(?i)R/", SCOPE.rxMarker, SCOPE.ingredientWord);
203
+ const dispenseSection = makeSection("(?i)Da?/", SCOPE.dispenseMarker, SCOPE.dispenseWord);
204
+ const signaSection = makeSection("(?i)S/", SCOPE.signaMarker, SCOPE.signaWord);
205
+ const grammar = {
206
+ $schema: "https://raw.githubusercontent.com/martinring/tmlanguage/master/tmlanguage.json",
207
+ name: "Recipe",
208
+ scopeName: "source.recipe",
209
+ fileTypes: ["recipe"],
210
+ patterns: [
211
+ ...comments,
212
+ rxSection,
213
+ dispenseSection,
214
+ signaSection,
215
+ warningAbbrev
216
+ ],
217
+ repository: {
218
+ comments: { patterns: comments },
219
+ "shared-atoms": { patterns: sharedAtoms }
220
+ }
221
+ };
222
+ return {
223
+ grammar,
224
+ stats: {
225
+ topLevelPatterns: countPatterns(grammar.patterns),
226
+ vocab: {
227
+ frequency: FREQUENCY.length,
228
+ timing: {
229
+ single: TIMING.length,
230
+ multi: TIMING_MULTIWORD.length
231
+ },
232
+ route: {
233
+ single: ROUTE.length,
234
+ multi: ROUTE_MULTIWORD.length
235
+ },
236
+ dispensing: {
237
+ single: DISPENSING.length,
238
+ multi: DISPENSING_MULTIWORD.length
239
+ },
240
+ forms: {
241
+ single: FORMS.length,
242
+ multi: FORMS_MULTIWORD.length
243
+ },
244
+ compounding: {
245
+ single: COMPOUNDING.length,
246
+ multi: COMPOUNDING_MULTIWORD.length
247
+ },
248
+ conditional: {
249
+ single: CONDITIONAL.length,
250
+ multi: CONDITIONAL_MULTIWORD.length
251
+ },
252
+ warning: WARNING.length,
253
+ units: UNITS.length
254
+ }
255
+ }
256
+ };
257
+ }
258
+ function countPatterns(patterns) {
259
+ let n = 0;
260
+ for (const p of patterns) {
261
+ n += 1;
262
+ if ("patterns" in p && p.patterns) n += countPatterns(p.patterns);
263
+ }
264
+ return n;
265
+ }
266
+ function serializeGrammar(g, indent) {
267
+ return `${JSON.stringify(g, null, indent === "tab" ? " " : indent)}\n`;
268
+ }
269
+ //#endregion
270
+ //#region src/verifier.ts
271
+ /**
272
+ * @file Pure verifier — tokenizes tree-sitter-recipe's own highlight fixtures
273
+ * with the generated TextMate grammar and reports whether each caret assertion
274
+ * lands on a matching scope.
275
+ *
276
+ * No CLI concerns here; the caller supplies paths and decides how to present
277
+ * the result (text table / JSON / exit code).
278
+ */
279
+ const require = createRequire(import.meta.url);
280
+ const oniguruma = require("vscode-oniguruma");
281
+ const { parseRawGrammar, Registry } = require("vscode-textmate");
282
+ const CAPTURE_EXPECTS = {
283
+ "keyword.directive": "keyword.control.directive",
284
+ "keyword.repeat": "keyword.other.frequency",
285
+ "keyword.error": "invalid.illegal.warning",
286
+ "keyword.operator": "keyword.operator",
287
+ "keyword.conditional": "keyword.control.conditional",
288
+ "keyword": "keyword.other.timing",
289
+ "function.macro": "support.function.route",
290
+ "attribute": "entity.other.attribute-name",
291
+ "type": "storage.type.form",
292
+ "type.builtin": "support.type.unit",
293
+ "number": "constant.numeric",
294
+ "variable": "variable.other.ingredient",
295
+ "string": "string.unquoted.signa",
296
+ "comment": "comment",
297
+ "comment.documentation": "comment",
298
+ "punctuation.delimiter": "punctuation.separator"
299
+ };
300
+ const ASSERT_RE = /^\s*#\s*(<-|\^+)\s+([\w.]+)\s*$/;
301
+ const COMMENT_ONLY_RE = /^\s*#/;
302
+ function parseFixture(content, name) {
303
+ const rawLines = content.split(/\r?\n/);
304
+ const sourceLines = [];
305
+ const asserts = [];
306
+ const sourceLineIndexForRawLine = [];
307
+ for (const raw of rawLines) if (!COMMENT_ONLY_RE.test(raw)) {
308
+ sourceLines.push(raw);
309
+ sourceLineIndexForRawLine.push(sourceLines.length);
310
+ } else sourceLineIndexForRawLine.push(sourceLines.length);
311
+ for (let i = 0; i < rawLines.length; i++) {
312
+ const raw = rawLines[i] ?? "";
313
+ if (!COMMENT_ONLY_RE.test(raw)) continue;
314
+ const match = raw.match(ASSERT_RE);
315
+ if (!match) continue;
316
+ const [, kind, capture] = match;
317
+ if (!kind || !capture) continue;
318
+ const targetLine = sourceLineIndexForRawLine[i] ?? 0;
319
+ if (targetLine === 0) continue;
320
+ const col = kind === "<-" ? 0 : raw.indexOf("^");
321
+ asserts.push({
322
+ fixture: name,
323
+ targetLine,
324
+ col,
325
+ capture
326
+ });
327
+ }
328
+ return {
329
+ source: sourceLines.join("\n"),
330
+ asserts
331
+ };
332
+ }
333
+ async function verify(opts) {
334
+ const wasmBin = readFileSync(opts.onigWasmPath);
335
+ await oniguruma.loadWASM(wasmBin.buffer);
336
+ const onigLib = Promise.resolve({
337
+ createOnigScanner: (patterns) => new oniguruma.OnigScanner(patterns),
338
+ createOnigString: (s) => new oniguruma.OnigString(s)
339
+ });
340
+ const rawGrammar = parseRawGrammar(readFileSync(opts.grammarPath, "utf-8"), opts.grammarPath);
341
+ const grammar = await new Registry({
342
+ onigLib,
343
+ loadGrammar: async () => null
344
+ }).addGrammar(rawGrammar);
345
+ const result = {
346
+ pass: 0,
347
+ total: 0,
348
+ failures: []
349
+ };
350
+ for (const name of readdirSync(opts.fixturesDir).sort()) {
351
+ if (!name.endsWith(".recipe")) continue;
352
+ const { source, asserts } = parseFixture(readFileSync(resolve(opts.fixturesDir, name), "utf-8"), name);
353
+ const sourceLines = source.split("\n");
354
+ let ruleStack = null;
355
+ const perLine = [];
356
+ for (const line of sourceLines) {
357
+ const r = grammar.tokenizeLine(line, ruleStack);
358
+ perLine.push(r.tokens.map((t) => ({
359
+ start: t.startIndex,
360
+ end: t.endIndex,
361
+ scopes: [...t.scopes]
362
+ })));
363
+ ruleStack = r.ruleStack;
364
+ }
365
+ for (const a of asserts) {
366
+ result.total += 1;
367
+ const tokens = perLine[a.targetLine - 1];
368
+ const hit = tokens?.find((t) => a.col >= t.start && a.col < t.end) ?? tokens?.find((t) => a.col === t.end);
369
+ const expected = CAPTURE_EXPECTS[a.capture];
370
+ if (!!(hit && expected && hit.scopes.some((s) => s.startsWith(expected)))) result.pass += 1;
371
+ else result.failures.push({
372
+ fixture: a.fixture,
373
+ line: a.targetLine,
374
+ col: a.col,
375
+ capture: a.capture,
376
+ got: hit ? hit.scopes : null
377
+ });
378
+ }
379
+ }
380
+ return result;
381
+ }
382
+ //#endregion
383
+ //#region package.json
384
+ var version = "0.3.3";
385
+ var homepage = "https://github.com/kjanat/recipe-tmlanguage#recipe-tmlanguage";
386
+ var repository = {
387
+ "type": "git",
388
+ "url": "git+https://github.com/kjanat/recipe-tmlanguage.git"
389
+ };
390
+ //#endregion
391
+ //#region bin/recipe-tmlang.ts
392
+ /**
393
+ * recipe-tmlang — TextMate grammar generator & verifier for recipe-tmlanguage.
394
+ *
395
+ * Subcommands
396
+ * - generate: Build dist/recipe.tmLanguage.json from the tree-sitter-recipe vocab.
397
+ * - verify: Tokenize tree-sitter-recipe's highlight fixtures and assert scopes.
398
+ *
399
+ * Zero manual argparse — argument parsing, help, and completions all come from
400
+ * {@link https://github.com/kjanat/dreamcli | DreamCLI}. `--json` is a DreamCLI built-in;
401
+ * we branch on {@linkcode Out.jsonMode}.
402
+ */
403
+ const DEFAULT_OUT = `${resolve(import.meta.dirname, "..")}/recipe.tmLanguage.json`;
404
+ const DEFAULT_FIXTURES_DIR = resolve(resolve(dirname(fileURLToPath(import.meta.resolve("tree-sitter-recipe/package.json")))), "test/highlight");
405
+ const DEFAULT_ONIG_WASM = fileURLToPath(import.meta.resolve("vscode-oniguruma/release/onig.wasm"));
406
+ const indentOf = (raw) => raw === "tab" ? "tab" : Number(raw);
407
+ const generate = command("generate").description("Build the TextMate grammar from the tree-sitter-recipe vocabulary").flag("out", flag.string().alias("o").default(DEFAULT_OUT).describe("Output JSON path")).flag("indent", flag.enum([
408
+ "tab",
409
+ "2",
410
+ "4"
411
+ ]).default("tab").describe("JSON indent")).flag("quiet", flag.boolean().alias("q").default(false).describe("Suppress stats on success")).action(({ flags, out }) => {
412
+ const { grammar, stats } = buildGrammar();
413
+ const serialized = serializeGrammar(grammar, indentOf(flags.indent));
414
+ const outAbs = resolve(cwd(), flags.out);
415
+ mkdirSync(dirname(outAbs), { recursive: true });
416
+ writeFileSync(outAbs, serialized);
417
+ if (out.jsonMode) {
418
+ out.json({
419
+ ok: true,
420
+ outPath: outAbs,
421
+ bytes: serialized.length,
422
+ stats
423
+ });
424
+ return;
425
+ }
426
+ if (flags.quiet) return;
427
+ out.log(`wrote ${outAbs}`);
428
+ out.log(` ${stats.topLevelPatterns} top-level patterns · ${serialized.length} bytes`);
429
+ const v = stats.vocab;
430
+ out.log(` vocab: ${v.frequency} frequency · ${v.timing.single}+${v.timing.multi} timing · ${v.route.single}+${v.route.multi} route · ${v.dispensing.single}+${v.dispensing.multi} dispensing · ${v.forms.single}+${v.forms.multi} forms · ${v.compounding.single}+${v.compounding.multi} compounding · ${v.conditional.single}+${v.conditional.multi} conditional · ${v.warning} warning · ${v.units} units`);
431
+ });
432
+ const verifyCmd = command("verify").description("Tokenize tree-sitter-recipe highlight fixtures and assert scope matches").flag("grammar", flag.string().alias("g").default(DEFAULT_OUT).describe("Path to .tmLanguage.json")).flag("fixtures", flag.string().alias("f").default(DEFAULT_FIXTURES_DIR).describe("Directory of .recipe fixtures")).flag("onig-wasm", flag.string().default(DEFAULT_ONIG_WASM).describe("Path to oniguruma WASM")).flag("max-failures", flag.number().default(40).describe("Max failures to print (0 = all)")).action(async ({ flags, out }) => {
433
+ const result = await verify({
434
+ grammarPath: resolve(cwd(), flags.grammar),
435
+ fixturesDir: resolve(cwd(), flags.fixtures),
436
+ onigWasmPath: resolve(cwd(), flags["onig-wasm"])
437
+ });
438
+ const failuresLen = result.failures.length;
439
+ if (out.jsonMode) {
440
+ out.json(result);
441
+ if (failuresLen > 0) {
442
+ out.setExitCode(1);
443
+ exit();
444
+ }
445
+ return;
446
+ }
447
+ out.log(`${result.pass} / ${result.total} assertions pass`);
448
+ if (failuresLen === 0) return;
449
+ out.log("");
450
+ out.log("── failures ──");
451
+ const limit = flags["max-failures"] === 0 ? failuresLen : flags["max-failures"];
452
+ for (const f of result.failures.slice(0, limit)) {
453
+ const gotStr = f.got ? f.got.filter((s) => s !== "source.recipe").join(" · ") || "(root only)" : "(no token)";
454
+ out.log(` ${f.fixture}:${f.line}:${f.col} expected ${f.capture} got [${gotStr}]`);
455
+ }
456
+ if (failuresLen > limit) out.log(` … +${failuresLen - limit} more`);
457
+ out.setExitCode(1);
458
+ });
459
+ const app = cli("recipe-tmlang").packageJson({
460
+ repository,
461
+ homepage,
462
+ version
463
+ }).links().description("TextMate grammar generator & verifier for the recipe DSL").command(generate).command(verifyCmd).completions();
464
+ if (import.meta.main) app.run();
465
+ //#endregion
466
+ export { app };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "recipe-tmlanguage",
3
- "version": "0.3.2",
3
+ "version": "0.3.3",
4
4
  "description": "TextMate grammar for the recipe (.recipe) pharmacological notation language.",
5
5
  "keywords": [
6
6
  "dreamcli",