npm - recipe-tmlanguage - Versions diffs - 0.3.0 - Mend

recipe-tmlanguage 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/src/grammar.ts ADDED Viewed

@@ -0,0 +1,332 @@
+/**
+ * @file Pure grammar builder — imports the tree-sitter-recipe vocabulary and
+ * compiles it into a TextMate grammar object. No filesystem I/O; the CLI
+ * handles serialization and writes.
+ *
+ * Scopes are standard TextMate names with a `.recipe` suffix so themes paint
+ * recipe blocks without a custom theme shipment.
+ */
+import { COUNTERS, NUMBER_WORDS, PERIOD_PLURALS, PERIODS } from "tree-sitter-recipe/grammar/dutch";
+import {
+	COMPOUNDING,
+	COMPOUNDING_MULTIWORD,
+	CONDITIONAL,
+	CONDITIONAL_MULTIWORD,
+	DISPENSING,
+	DISPENSING_MULTIWORD,
+	FORMS,
+	FORMS_MULTIWORD,
+	FREQUENCY,
+	ROUTE,
+	ROUTE_MULTIWORD,
+	TIMING,
+	TIMING_MULTIWORD,
+	WARNING,
+} from "tree-sitter-recipe/grammar/latin";
+import { UNITS } from "tree-sitter-recipe/grammar/units";
+// scope map
+export const SCOPE = {
+	rxMarker: "keyword.control.directive.rx.recipe",
+	dispenseMarker: "keyword.control.directive.dispense.recipe",
+	signaMarker: "keyword.control.directive.signa.recipe",
+	frequency: "keyword.other.frequency.recipe",
+	timing: "keyword.other.timing.recipe",
+	route: "support.function.route.recipe",
+	dispensing: "entity.other.attribute-name.recipe",
+	warning: "invalid.illegal.warning.recipe",
+	form: "storage.type.form.recipe",
+	compounding: "keyword.operator.compounding.recipe",
+	conditional: "keyword.control.conditional.recipe",
+	fillMarker: "keyword.operator.fill.recipe",
+	dtdKeyword: "keyword.operator.dtd.recipe",
+	number: "constant.numeric.recipe",
+	unit: "support.type.unit.recipe",
+	lineComment: "comment.line.number-sign.recipe",
+	docCommentLine: "comment.line.documentation.recipe",
+	blockComment: "comment.block.recipe",
+	docCommentBlock: "comment.block.documentation.recipe",
+	punctuation: "punctuation.separator.recipe",
+	ingredientWord: "variable.other.ingredient.recipe",
+	signaWord: "string.unquoted.signa.recipe",
+	dispenseWord: "variable.other.dispense.recipe",
+} as const;
+// regex helpers
+// TextMate uses Oniguruma — first-match, not longest-match like tree-sitter —
+// so we always sort alternatives longest-first before joining with `|`.
+const REGEX_METACHARS = /[.*+?^${}()|[\]\\]/g;
+const escapeRegex = (s: string): string => s.replace(REGEX_METACHARS, "\\$&");
+const alt = (items: readonly string[]): string =>
+	[...new Set(items)]
+		.sort((a, b) => b.length - a.length)
+		.map(escapeRegex)
+		.join("|");
+const altMultiword = (items: readonly string[]): string =>
+	[...new Set(items)]
+		.sort((a, b) => b.length - a.length)
+		.map((s) => s.replace(/\./g, "\\.").replace(/\s+/g, "\\s+"))
+		.join("|");
+// Word boundary that treats `.` as part of the token so `a.c.` doesn't match
+// inside `a.c.e.`. `\b` alone is not enough because `.` is non-word.
+const wb = (pattern: string): string => `(?<![\\w.])(?:${pattern})(?![\\w.])`;
+// types
+type Capture = { name?: string; patterns?: Pattern[] };
+type Captures = Record<string, Capture>;
+export type Pattern =
+	| { include: string }
+	| { name?: string; match: string; captures?: Captures }
+	| {
+		name?: string;
+		begin: string;
+		end: string;
+		beginCaptures?: Captures;
+		endCaptures?: Captures;
+		patterns?: Pattern[];
+		contentName?: string;
+	};
+export type Grammar = {
+	$schema?: string;
+	name: string;
+	scopeName: string;
+	fileTypes: string[];
+	patterns: Pattern[];
+	repository: Record<string, { patterns: Pattern[] } | Pattern>;
+};
+export type VocabStats = {
+	frequency: number;
+	timing: { single: number; multi: number };
+	route: { single: number; multi: number };
+	dispensing: { single: number; multi: number };
+	forms: { single: number; multi: number };
+	compounding: { single: number; multi: number };
+	conditional: { single: number; multi: number };
+	warning: number;
+	units: number;
+};
+export type BuildStats = {
+	topLevelPatterns: number;
+	vocab: VocabStats;
+};
+export type BuildResult = {
+	grammar: Grammar;
+	stats: BuildStats;
+};
+// grammar assembly
+export function buildGrammar(): BuildResult {
+	// Dose must come before bare number, else "50" matches first and leaves "mg" to fall to the word fallback.
+	const doseMatch: Pattern = {
+		match: `(\\d+(?:[.,]\\d+)?)\\s*(${alt(UNITS)})(?![A-Za-zÀ-ÿ])`,
+		captures: {
+			"1": { name: SCOPE.number },
+			"2": { name: SCOPE.unit },
+		},
+	};
+	const bareNumber: Pattern = {
+		match: "\\d+(?:[.,]\\d+)?",
+		name: SCOPE.number,
+	};
+	const compactFrequency: Pattern = {
+		match: "[1-9]\\s*dd(?![A-Za-zÀ-ÿ0-9])",
+		name: SCOPE.frequency,
+	};
+	// `ad` is a word too; only paint as fill-marker when followed by digit.
+	const fillTo: Pattern = {
+		match: "\\bad\\b(?=\\s+\\d)",
+		name: SCOPE.fillMarker,
+	};
+	const dtdDirective: Pattern = {
+		match: "(?i)(?<![\\w.])(d\\.?t\\.?d\\.?)(?:\\s+(no))?(?=\\s+\\d)",
+		captures: {
+			"1": { name: SCOPE.dtdKeyword },
+			"2": { name: SCOPE.dtdKeyword },
+		},
+	};
+	// Case-sensitive — CITO/cito/Cito are separate vocab entries. Painted red.
+	const warningAbbrev: Pattern = {
+		match: wb(alt(WARNING)),
+		name: SCOPE.warning,
+	};
+	// Multiword first (longer match wins), then dotted singles. All word-bounded.
+	const latinAbbrevs: Pattern[] = [
+		{ match: wb(altMultiword(TIMING_MULTIWORD)), name: SCOPE.timing },
+		{ match: wb(altMultiword(ROUTE_MULTIWORD)), name: SCOPE.route },
+		{ match: wb(altMultiword(DISPENSING_MULTIWORD)), name: SCOPE.dispensing },
+		{ match: wb(altMultiword(FORMS_MULTIWORD)), name: SCOPE.form },
+		{ match: wb(altMultiword(COMPOUNDING_MULTIWORD)), name: SCOPE.compounding },
+		{ match: wb(altMultiword(CONDITIONAL_MULTIWORD)), name: SCOPE.conditional },
+		{ match: wb(alt(FREQUENCY)), name: SCOPE.frequency },
+		{ match: wb(alt(TIMING)), name: SCOPE.timing },
+		{ match: wb(alt(ROUTE)), name: SCOPE.route },
+		{ match: wb(alt(DISPENSING)), name: SCOPE.dispensing },
+		{ match: wb(alt(FORMS)), name: SCOPE.form },
+		{ match: wb(alt(COMPOUNDING)), name: SCOPE.compounding },
+		{ match: wb(alt(CONDITIONAL)), name: SCOPE.conditional },
+	];
+	const punctuation: Pattern = {
+		match: "[-.,;:()]",
+		name: SCOPE.punctuation,
+	};
+	// Doc variants must match before their plain counterparts (#! before #, /** before /*).
+	const comments: Pattern[] = [
+		{ name: SCOPE.docCommentBlock, begin: "/\\*\\*", end: "\\*/" },
+		{ name: SCOPE.blockComment, begin: "/\\*", end: "\\*/" },
+		{ name: SCOPE.docCommentLine, match: "#!.*$" },
+		{ name: SCOPE.lineComment, match: "#.*$" },
+	];
+	// Dutch patient-prose frequency, built from the tree-sitter-recipe
+	// `grammar/dutch` vocab. The whole phrase paints as frequency, mirroring
+	// the upstream highlights — `(frequency (number) @keyword.repeat)`,
+	// `(count_word) @keyword.repeat`, `(period) @keyword.repeat` — so the
+	// leading count (digit or spelled) is part of the frequency, not a dose.
+	const period = alt(PERIODS);
+	const periodNoun = alt([...PERIOD_PLURALS, ...PERIODS]);
+	const dutchFrequency: Pattern[] = [
+		// interval: "om de [andere] [N] uur|dag|dagen|…"
+		{
+			match: `(?i)\\bom[ \\t]+de(?:[ \\t]+andere)?(?:[ \\t]+\\d+)?[ \\t]+(?:${periodNoun})\\b`,
+			name: SCOPE.frequency,
+		},
+		// digit cadence: "3 keer per dag", "3x daags", "2 maal per week"
+		{
+			match: `(?i)\\b\\d+[ \\t]*(?:${alt(COUNTERS)})[ \\t]+(?:per[ \\t]+(?:${period})|daags)\\b`,
+			name: SCOPE.frequency,
+		},
+		// spelled count word: "driemaal", "eenmaal per dag", "driemaal daags"
+		{
+			match: `(?i)\\b(?:${alt(NUMBER_WORDS)})[ \\t]*maal(?:[ \\t]+(?:daags|per[ \\t]+(?:${period})))?\\b`,
+			name: SCOPE.frequency,
+		},
+	];
+	// Shared atoms inside every section. Order = first-match priority.
+	const sharedAtoms: Pattern[] = [
+		...comments,
+		warningAbbrev,
+		dtdDirective,
+		fillTo,
+		compactFrequency,
+		...dutchFrequency,
+		doseMatch,
+		...latinAbbrevs,
+		bareNumber,
+		punctuation,
+	];
+	/**
+	 * Sections end only at the literal next marker (R/, Da/, D/, S/) or EOF.
+	 * The trailing slash is load-bearing: without it, `s\b` inside `s.o.s.`
+	 * would spuriously close a signa section because `.` is non-word.
+	 */
+	const nextSection = "(?i)(?=R/|Da?/|S/)|\\z";
+	const makeSection = (
+		begin: string,
+		marker: string,
+		wordScope: string,
+	): Pattern => ({
+		name: `meta.section.${wordScope.split(".")[2] ?? "unknown"}.recipe`,
+		begin,
+		beginCaptures: { "0": { name: marker } },
+		end: nextSection,
+		patterns: [
+			...sharedAtoms,
+			{ match: "[A-Za-zÀ-ÿ][A-Za-zÀ-ÿ0-9\\-]*", name: wordScope },
+		],
+	});
+	const rxSection = makeSection("(?i)R/", SCOPE.rxMarker, SCOPE.ingredientWord);
+	const dispenseSection = makeSection(
+		"(?i)Da?/",
+		SCOPE.dispenseMarker,
+		SCOPE.dispenseWord,
+	);
+	const signaSection = makeSection(
+		"(?i)S/",
+		SCOPE.signaMarker,
+		SCOPE.signaWord,
+	);
+	const grammar: Grammar = {
+		$schema: "https://raw.githubusercontent.com/martinring/tmlanguage/master/tmlanguage.json",
+		name: "Recipe",
+		scopeName: "source.recipe",
+		fileTypes: ["recipe"],
+		patterns: [
+			...comments,
+			rxSection,
+			dispenseSection,
+			signaSection,
+			warningAbbrev,
+		],
+		repository: {
+			comments: { patterns: comments },
+			"shared-atoms": { patterns: sharedAtoms },
+		},
+	};
+	const stats: BuildStats = {
+		topLevelPatterns: countPatterns(grammar.patterns),
+		vocab: {
+			frequency: FREQUENCY.length,
+			timing: { single: TIMING.length, multi: TIMING_MULTIWORD.length },
+			route: { single: ROUTE.length, multi: ROUTE_MULTIWORD.length },
+			dispensing: {
+				single: DISPENSING.length,
+				multi: DISPENSING_MULTIWORD.length,
+			},
+			forms: { single: FORMS.length, multi: FORMS_MULTIWORD.length },
+			compounding: {
+				single: COMPOUNDING.length,
+				multi: COMPOUNDING_MULTIWORD.length,
+			},
+			conditional: {
+				single: CONDITIONAL.length,
+				multi: CONDITIONAL_MULTIWORD.length,
+			},
+			warning: WARNING.length,
+			units: UNITS.length,
+		},
+	};
+	return { grammar, stats };
+}
+function countPatterns(patterns: Pattern[]): number {
+	let n = 0;
+	for (const p of patterns) {
+		n += 1;
+		if ("patterns" in p && p.patterns) n += countPatterns(p.patterns);
+	}
+	return n;
+}
+export function serializeGrammar(g: Grammar, indent: "tab" | number): string {
+	const space = indent === "tab" ? "\t" : indent;
+	return `${JSON.stringify(g, null, space)}\n`;
+}

package/src/verifier.ts ADDED Viewed

@@ -0,0 +1,168 @@
+/**
+ * @file Pure verifier — tokenizes tree-sitter-recipe's own highlight fixtures
+ * with the generated TextMate grammar and reports whether each caret assertion
+ * lands on a matching scope.
+ *
+ * No CLI concerns here; the caller supplies paths and decides how to present
+ * the result (text table / JSON / exit code).
+ */
+import { readdirSync, readFileSync } from "node:fs";
+import { createRequire } from "node:module";
+import { resolve } from "node:path";
+import type { StateStack } from "vscode-textmate";
+const require = createRequire(import.meta.url);
+const oniguruma: typeof import("vscode-oniguruma") = require("vscode-oniguruma");
+const textmate: typeof import("vscode-textmate") = require("vscode-textmate");
+const { parseRawGrammar, Registry } = textmate;
+// ── capture → scope mapping (inverse of grammar.ts SCOPE) ───────────────────
+// Fixtures speak tree-sitter capture names; the tokenizer speaks TextMate
+// scopes. A token passes when one of its scopes starts with the expected
+// prefix below — the scope tree is hierarchical, so prefix-match is correct.
+const CAPTURE_EXPECTS: Record<string, string> = {
+	"keyword.directive": "keyword.control.directive",
+	"keyword.repeat": "keyword.other.frequency",
+	"keyword.error": "invalid.illegal.warning",
+	"keyword.operator": "keyword.operator",
+	"keyword.conditional": "keyword.control.conditional",
+	"keyword": "keyword.other.timing",
+	"function.macro": "support.function.route",
+	"attribute": "entity.other.attribute-name",
+	"type": "storage.type.form",
+	"type.builtin": "support.type.unit",
+	"number": "constant.numeric",
+	"variable": "variable.other.ingredient",
+	"string": "string.unquoted.signa",
+	"comment": "comment",
+	"comment.documentation": "comment",
+	"punctuation.delimiter": "punctuation.separator",
+};
+export type Failure = {
+	fixture: string;
+	line: number;
+	col: number;
+	capture: string;
+	got: string[] | null;
+};
+export type VerifyResult = {
+	pass: number;
+	total: number;
+	failures: Failure[];
+};
+export type VerifyOptions = {
+	grammarPath: string;
+	fixturesDir: string;
+	onigWasmPath: string;
+};
+// ── fixture parser ──────────────────────────────────────────────────────────
+type Assertion = {
+	fixture: string;
+	targetLine: number; // 1-indexed source line
+	col: number; // 0-indexed column
+	capture: string;
+};
+const ASSERT_RE = /^\s*#\s*(<-|\^+)\s+([\w.]+)\s*$/;
+const COMMENT_ONLY_RE = /^\s*#/;
+function parseFixture(content: string, name: string): { source: string; asserts: Assertion[] } {
+	const rawLines = content.split(/\r?\n/);
+	const sourceLines: string[] = [];
+	const asserts: Assertion[] = [];
+	const sourceLineIndexForRawLine: number[] = [];
+	for (const raw of rawLines) {
+		if (!COMMENT_ONLY_RE.test(raw)) {
+			sourceLines.push(raw);
+			sourceLineIndexForRawLine.push(sourceLines.length);
+		} else {
+			sourceLineIndexForRawLine.push(sourceLines.length);
+		}
+	}
+	for (let i = 0; i < rawLines.length; i++) {
+		const raw = rawLines[i] ?? "";
+		if (!COMMENT_ONLY_RE.test(raw)) continue;
+		const match = raw.match(ASSERT_RE);
+		if (!match) continue;
+		const [, kind, capture] = match;
+		if (!kind || !capture) continue;
+		const targetLine = sourceLineIndexForRawLine[i] ?? 0;
+		if (targetLine === 0) continue;
+		const col = kind === "<-" ? 0 : raw.indexOf("^");
+		asserts.push({ fixture: name, targetLine, col, capture });
+	}
+	return { source: sourceLines.join("\n"), asserts };
+}
+// ── main ────────────────────────────────────────────────────────────────────
+export async function verify(opts: VerifyOptions): Promise<VerifyResult> {
+	const wasmBin = readFileSync(opts.onigWasmPath);
+	await oniguruma.loadWASM(wasmBin.buffer as ArrayBuffer);
+	const onigLib = Promise.resolve({
+		createOnigScanner: (patterns: string[]) => new oniguruma.OnigScanner(patterns),
+		createOnigString: (s: string) => new oniguruma.OnigString(s),
+	});
+	const rawGrammar = parseRawGrammar(
+		readFileSync(opts.grammarPath, "utf-8"),
+		opts.grammarPath,
+	);
+	const registry = new Registry({ onigLib, loadGrammar: async () => null });
+	const grammar = await registry.addGrammar(rawGrammar);
+	const result: VerifyResult = { pass: 0, total: 0, failures: [] };
+	for (const name of readdirSync(opts.fixturesDir).sort()) {
+		if (!name.endsWith(".recipe")) continue;
+		const content = readFileSync(resolve(opts.fixturesDir, name), "utf-8");
+		const { source, asserts } = parseFixture(content, name);
+		const sourceLines = source.split("\n");
+		let ruleStack: StateStack | null = null;
+		const perLine: { start: number; end: number; scopes: string[] }[][] = [];
+		for (const line of sourceLines) {
+			const r = grammar.tokenizeLine(line, ruleStack);
+			perLine.push(r.tokens.map((t) => ({
+				start: t.startIndex,
+				end: t.endIndex,
+				scopes: [...t.scopes],
+			})));
+			ruleStack = r.ruleStack;
+		}
+		for (const a of asserts) {
+			result.total += 1;
+			const tokens = perLine[a.targetLine - 1];
+			// A caret may sit one past the final character (a token's exclusive
+			// end at end-of-line) — tree-sitter's own harness accepts that, so
+			// fall back to the token whose right boundary equals the column.
+			const hit = tokens?.find((t) => a.col >= t.start && a.col < t.end)
+				?? tokens?.find((t) => a.col === t.end);
+			const expected = CAPTURE_EXPECTS[a.capture];
+			const passed = !!(hit && expected && hit.scopes.some((s) => s.startsWith(expected)));
+			if (passed) {
+				result.pass += 1;
+			} else {
+				result.failures.push({
+					fixture: a.fixture,
+					line: a.targetLine,
+					col: a.col,
+					capture: a.capture,
+					got: hit ? hit.scopes : null,
+				});
+			}
+		}
+	}
+	return result;
+}