npm - mdat-plugin-cli-help - Versions diffs - 2.0.2 → 2.1.1 - Mend

mdat-plugin-cli-help 2.0.2 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.js CHANGED Viewed

@@ -134,6 +134,326 @@ function setLogger(logger) {
 	log = injectionHelper(logger);
 }
 //#endregion
+//#region src/utilities/parsers/commander.ts
+const flag$2 = createToken({
+	name: "flag",
+	pattern: /--[\w-]+/
+});
+const alias$2 = createToken({
+	name: "alias",
+	pattern: /-[A-Z]/i
+});
+const comma$2 = createToken({
+	group: Lexer.SKIPPED,
+	name: "comma",
+	pattern: /,/
+});
+const word$2 = createToken({
+	name: "word",
+	pattern: /\S+/
+});
+const argument$2 = createToken({
+	name: "argument",
+	pattern: /<\S+>|\[\S+\]/
+});
+const defaultInfoParens = createToken({
+	name: "defaultInfoParens",
+	pattern: /\(default:\s.+?\)/
+});
+const whiteSpace$2 = createToken({
+	group: Lexer.SKIPPED,
+	name: "whiteSpace",
+	pattern: /\s/
+});
+const usagePrefix = createToken({
+	group: Lexer.SKIPPED,
+	name: "usagePrefix",
+	pattern: /Usage:\s/
+});
+const startProgramDescription$1 = createToken({
+	group: Lexer.SKIPPED,
+	name: "startProgramDescription",
+	pattern: /\n\n/,
+	push_mode: "PROGRAM_DESCRIPTION_MODE"
+});
+const programDescription$2 = createToken({
+	name: "programDescription",
+	pattern: /.+/
+});
+const endProgramDescription$1 = createToken({
+	group: Lexer.SKIPPED,
+	name: "endProgramDescription",
+	pattern: /\n\n/,
+	pop_mode: true
+});
+const startArgumentsSection = createToken({
+	name: "startArgumentsSection",
+	pattern: /Arguments:\n/,
+	push_mode: "SECTION_MODE"
+});
+const startOptionsSection$1 = createToken({
+	name: "startOptionsSection",
+	pattern: /Options:\n/,
+	push_mode: "SECTION_MODE"
+});
+const startCommandsSection$1 = createToken({
+	name: "startCommandsSection",
+	pattern: /Commands:\n/,
+	push_mode: "SECTION_MODE"
+});
+const startRow$2 = createToken({
+	name: "startRow",
+	pattern: / {2,}/,
+	push_mode: "ROW_MODE"
+});
+const rowDescription$2 = createToken({
+	name: "rowDescription",
+	pattern: / {2}\w.+? {2}/
+});
+const rowDescriptionTerminal$1 = createToken({
+	name: "rowDescriptionTerminal",
+	pattern: / {2}\w[^(\n]+(?=\(default:)| {2}\w.+/
+});
+const endRow$2 = createToken({
+	group: Lexer.SKIPPED,
+	name: "endRow",
+	pattern: /\n/,
+	pop_mode: true
+});
+const endSection$1 = createToken({
+	group: Lexer.SKIPPED,
+	name: "endSection",
+	pattern: /\n+/,
+	pop_mode: true
+});
+const lexer$2 = new Lexer({
+	defaultMode: "DEFAULT_MODE",
+	modes: {
+		DEFAULT_MODE: [
+			startArgumentsSection,
+			startOptionsSection$1,
+			startCommandsSection$1,
+			usagePrefix,
+			startProgramDescription$1,
+			argument$2,
+			word$2,
+			whiteSpace$2
+		],
+		PROGRAM_DESCRIPTION_MODE: [endProgramDescription$1, programDescription$2],
+		ROW_MODE: [
+			endRow$2,
+			comma$2,
+			defaultInfoParens,
+			rowDescription$2,
+			rowDescriptionTerminal$1,
+			flag$2,
+			alias$2,
+			argument$2,
+			word$2,
+			whiteSpace$2
+		],
+		SECTION_MODE: [startRow$2, endSection$1]
+	}
+});
+const allTokens$2 = [
+	flag$2,
+	alias$2,
+	comma$2,
+	word$2,
+	argument$2,
+	defaultInfoParens,
+	whiteSpace$2,
+	usagePrefix,
+	startProgramDescription$1,
+	programDescription$2,
+	endProgramDescription$1,
+	startArgumentsSection,
+	startOptionsSection$1,
+	startCommandsSection$1,
+	startRow$2,
+	rowDescription$2,
+	rowDescriptionTerminal$1,
+	endRow$2,
+	endSection$1
+];
+var CliParser$2 = class extends CstParser {
+	sectionRow = this.RULE("sectionRow", () => {
+		this.CONSUME(startRow$2);
+		this.MANY(() => {
+			this.OR([
+				{ ALT: () => this.CONSUME(argument$2) },
+				{ ALT: () => this.CONSUME(alias$2) },
+				{ ALT: () => this.CONSUME(flag$2) },
+				{ ALT: () => this.CONSUME(rowDescription$2, { LABEL: "description" }) },
+				{ ALT: () => this.CONSUME(rowDescriptionTerminal$1, { LABEL: "description" }) },
+				{ ALT: () => this.CONSUME(defaultInfoParens) },
+				{ ALT: () => this.CONSUME(word$2, { LABEL: "commandName" }) }
+			]);
+		});
+	});
+	argumentsSection = this.RULE("argumentsSection", () => {
+		this.CONSUME(startArgumentsSection);
+		this.MANY3(() => {
+			this.SUBRULE3(this.sectionRow);
+		});
+	});
+	commandsSection = this.RULE("commandsSection", () => {
+		this.CONSUME(startCommandsSection$1);
+		this.MANY1(() => {
+			this.SUBRULE1(this.sectionRow);
+		});
+	});
+	optionsSection = this.RULE("optionsSection", () => {
+		this.CONSUME(startOptionsSection$1);
+		this.MANY2(() => {
+			this.SUBRULE2(this.sectionRow);
+		});
+	});
+	programHelp = this.RULE("programHelp", () => {
+		this.AT_LEAST_ONE(() => {
+			this.CONSUME(word$2, { LABEL: "commandName" });
+		});
+		this.MANY1(() => {
+			this.CONSUME(argument$2);
+		});
+		this.OPTION(() => {
+			this.CONSUME(programDescription$2, { LABEL: "description" });
+		});
+		this.OPTION1(() => {
+			this.SUBRULE(this.argumentsSection);
+		});
+		this.OPTION2(() => {
+			this.SUBRULE(this.optionsSection);
+		});
+		this.OPTION3(() => {
+			this.SUBRULE(this.commandsSection);
+		});
+	});
+	constructor() {
+		super(allTokens$2);
+		this.performSelfAnalysis();
+	}
+};
+const parser$2 = new CliParser$2();
+var CliHelpToObjectVisitor$2 = class extends parser$2.getBaseCstVisitorConstructor() {
+	constructor() {
+		super();
+		this.validateVisitor();
+	}
+	argumentsSection(context) {
+		return context.sectionRow.map((entry) => {
+			const row = this.visit(entry);
+			return {
+				arguments: row.commandName ? [row.commandName] : void 0,
+				defaultValue: row.defaultValue,
+				description: row.description
+			};
+		});
+	}
+	commandsSection(context) {
+		return context.sectionRow.map((entry) => this.visit(entry));
+	}
+	optionsSection(context) {
+		return context.sectionRow.map((entry) => this.visit(entry));
+	}
+	programHelp(context) {
+		const { command: commandName, subcommand: subcommandName } = getCommandParts(this.getString(context.commandName));
+		return {
+			arguments: this.getArray(context.argument),
+			commandName,
+			commands: context.commandsSection ? this.visit(context.commandsSection) : void 0,
+			description: this.getString(context.description),
+			options: context.optionsSection ? this.visit(context.optionsSection) : void 0,
+			positionals: context.argumentsSection ? this.visit(context.argumentsSection) : void 0,
+			subcommandName
+		};
+	}
+	sectionRow(context) {
+		return {
+			aliases: this.getArray(context.alias),
+			arguments: this.getArray(context.argument),
+			commandName: this.getString(context.commandName),
+			defaultValue: this.cleanDefault(this.getString(context.defaultInfoParens)),
+			description: this.trimDescription(this.getString(context.description)),
+			flags: this.getArray(context.flag)
+		};
+	}
+	/**
+	* Clean a Commander default value: strip `(default: ...)` wrapper, env info, and quotes.
+	*/
+	cleanDefault(text) {
+		if (text === void 0) return void 0;
+		let cleaned = text.replaceAll(/^\(default:\s*/g, "").replaceAll(/\)$/g, "").trim();
+		cleaned = cleaned.replaceAll(/,\s*env:\s*\S+$/g, "").trim();
+		cleaned = cleaned.replaceAll(/^["']|["']$/g, "");
+		return cleaned || void 0;
+	}
+	getArray(context) {
+		if (context === void 0) return void 0;
+		return context.map((entry) => entry.image);
+	}
+	getString(context) {
+		if (context === void 0) return void 0;
+		return context.map((entry) => entry.image).join(" ");
+	}
+	/**
+	* Trim leading/trailing whitespace from description text.
+	*/
+	trimDescription(text) {
+		if (text === void 0) return void 0;
+		return text.trim() || void 0;
+	}
+};
+const visitor$2 = new CliHelpToObjectVisitor$2();
+/**
+* Converts an unstructured help string emitted from a CLI tool built with the
+* `Commander` CLI library and turn it into a structured POJO describing the
+* command.
+*/
+function helpStringToObject$3(helpString) {
+	if (!helpString.trimStart().startsWith("Usage:")) throw new Error("Not a Commander-format help string (must start with \"Usage:\")");
+	const unwrapped = unwrapContinuationLines$1(helpString);
+	const lexingResult = lexer$2.tokenize(unwrapped);
+	if (lexingResult.errors.length > 0) throw new Error(`Errors lexing CLI command: ${JSON.stringify(lexingResult.errors, void 0, 2)}`);
+	parser$2.input = lexingResult.tokens;
+	const cst = parser$2.programHelp();
+	if (parser$2.errors.length > 0) throw new Error(`Errors parsing CLI command help text: ${JSON.stringify(parser$2.errors, void 0, 2)}`);
+	let programInfo;
+	try {
+		programInfo = visitor$2.visit(cst);
+	} catch (error) {
+		if (error instanceof Error) throw new TypeError(`Errors visiting CLI command help text: ${String(error)}`);
+	}
+	if (programInfo === void 0) throw new Error("Could not parse help string");
+	if (programInfo.commands) {
+		programInfo.commands = programInfo.commands.filter((cmd) => cmd.commandName !== void 0 || cmd.description !== void 0);
+		programInfo.commands = programInfo.commands.filter((cmd) => cmd.commandName !== "help");
+		for (const cmd of programInfo.commands) if (cmd.commandName && !cmd.parentCommandName) cmd.parentCommandName = programInfo.commandName;
+		if (programInfo.commands.length === 0) programInfo.commands = void 0;
+	}
+	return programInfo;
+}
+/**
+* Join continuation lines in Commander help output before lexing.
+*
+* Commander wraps long descriptions and default values across multiple lines,
+* indenting continuation lines to align with the description start column
+* (typically 30+ spaces). The lexer's ROW_MODE exits on newline, so we must
+* unwrap these before tokenizing.
+*
+* Detection: a continuation line has 4+ leading spaces and does NOT start a
+* new row (which would be exactly 2 spaces + a non-space character).
+*/
+const continuationLinePattern = /^ {4,}/;
+const newRowPattern = /^ {2}\S/;
+function unwrapContinuationLines$1(helpString) {
+	const lines = helpString.split("\n");
+	const result = [];
+	for (const line of lines) if (result.length > 0 && line.length > 0 && continuationLinePattern.test(line) && !newRowPattern.test(line)) result[result.length - 1] += " " + line.trim();
+	else result.push(line);
+	return result.join("\n");
+}
+//#endregion
 //#region src/utilities/parsers/meow.ts
 const flag$1 = createToken({
 	name: "flag",
@@ -657,7 +977,8 @@ const visitor = new CliHelpToObjectVisitor();
 * command.
 */
 function helpStringToObject$1(helpString) {
-	const lexingResult = lexer.tokenize(helpString);
+	const unwrapped = unwrapContinuationLines(helpString);
+	const lexingResult = lexer.tokenize(unwrapped);
 	if (lexingResult.errors.length > 0) throw new Error(`Errors lexing CLI command: ${JSON.stringify(lexingResult.errors, void 0, 2)}`);
 	parser.input = lexingResult.tokens;
 	const cst = parser.programHelp();
@@ -671,9 +992,40 @@ function helpStringToObject$1(helpString) {
 	if (programInfo === void 0) throw new Error("Could not parse help string");
 	return programInfo;
 }
+/**
+* Join continuation lines in Yargs help output before lexing.
+*
+* When terminal width is narrow, Yargs wraps long descriptions across multiple
+* lines, indenting continuations to align with the description start column.
+* The lexer's ROW_MODE exits on newline, so we must unwrap these first.
+*
+* Detection: a continuation line has 4+ leading spaces and its first non-space
+* character is NOT `-` (which would indicate a new option/alias row). Yargs
+* uses variable indentation for option rows (2 spaces for aliased options like
+* `-r, --rules`, 6 spaces for long-only options like `--config`), so we cannot
+* rely on indent depth alone to distinguish new rows from continuations.
+*/
+const deepIndentPattern = /^ {4,}/;
+const newOptionRowPattern = /^ *-/;
+const sectionHeaderPattern = /^(?:Options|Commands|Positionals):?\s*$/;
+function unwrapContinuationLines(helpString) {
+	const lines = helpString.split("\n");
+	const result = [];
+	let inSection = false;
+	for (const line of lines) if (sectionHeaderPattern.test(line)) {
+		inSection = true;
+		result.push(line);
+	} else if (line.trim() === "") {
+		inSection = false;
+		result.push(line);
+	} else if (inSection && result.length > 0 && line.length > 0 && deepIndentPattern.test(line) && !newOptionRowPattern.test(line)) result[result.length - 1] += " " + line.trim();
+	else result.push(line);
+	return result.join("\n");
+}
 //#endregion
 //#region src/utilities/parsers/index.ts
 var parsers_default = {
+	commander: helpStringToObject$3,
 	yargs: helpStringToObject$1,
 	meow: helpStringToObject$2
 };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "mdat-plugin-cli-help",
-  "version": "2.0.2",
+  "version": "2.1.1",
   "description": "Mdat plugin to generate tabular help documentation for CLI tools in Markdown files.",
   "keywords": [
     "markdown",
@@ -38,20 +38,23 @@
     "@types/which": "^3.0.4",
     "chevrotain": "^12.0.0",
     "execa": "^9.6.1",
-    "lognow": "^0.6.0",
+    "lognow": "^0.6.1",
     "type-fest": "^5.5.0",
     "which": "^6.0.1",
     "zod": "^4.3.6"
   },
   "devDependencies": {
-    "@kitschpatrol/shared-config": "^7.0.0",
+    "@kitschpatrol/shared-config": "^7.1.0",
     "@types/node": "~22.17.2",
+    "@types/yargs": "^17.0.35",
     "bumpp": "^11.0.1",
-    "mdat": "^2.0.0",
+    "commander": "^14.0.3",
+    "mdat": "^2.2.0",
     "meow": "^14.1.0",
     "tsdown": "^0.21.7",
     "typescript": "~5.9.3",
-    "vitest": "^4.1.2"
+    "vitest": "^4.1.2",
+    "yargs": "^18.0.0"
   },
   "peerDependencies": {
     "mdat": "^2.0.0"

package/readme.md CHANGED Viewed

@@ -26,7 +26,7 @@ This plugin automatically transforms a CLI command's `--help` output into nicely
 The rule also recursively calls `--help` on any subcommands found for inclusion in the output.
-Currently, the rule can only parse help output in the format provided by [Yargs](https://yargs.js.org)- and [Meow](https://github.com/sindresorhus/meow)-based tools. If parsing fails, the rule will fall back to show the raw help output in a regular code block instead.
+Currently, the rule can parse help output in the format provided by [Commander](https://github.com/tj/commander.js)-, [Yargs](https://yargs.js.org)-, and [Meow](https://github.com/sindresorhus/meow)-based tools. If parsing fails, the rule will fall back to show the raw help output in a regular code block instead.
 ## Getting started
@@ -119,9 +119,39 @@ If you embed the rule without any arguments, it will look for the binary file li
 <!-- cli-help -->
 ```
+### Supported CLI frameworks
+#### [Yargs](https://yargs.js.org)
+Fully supported, including options, commands, positionals, choices, defaults, and type annotations.
+The parser handles line-wrapped output by unwrapping continuation lines before parsing. However, when Yargs wraps command _arguments_ onto new lines at very narrow terminal widths (e.g. below \~70 columns), those wrapped argument lines are indistinguishable from new command rows and cannot be reliably unwrapped. In practice, this is rare.
+For the most reliable parsing if you control the upstream project, configure your Yargs CLI to disable wrapping:
+```ts
+yargs(process.argv).wrap(process.stdout.isTTY ? Math.min(120, yargs.terminalWidth()) : 0)
+```
+This outputs unwrapped help text when piped, while preserving normal wrapping for interactive use.
+#### [Commander](https://github.com/tj/commander.js)
+Fully supported, including options, commands, arguments (positionals), and parenthesized defaults with optional environment variable annotations (e.g. `(default: "value", env: MY_VAR)`).
+The parser handles line-wrapped output by unwrapping continuation lines before parsing. Commander's built-in `help` command is automatically filtered from subcommand recursion to avoid duplicate output.
+#### [Meow](https://github.com/sindresorhus/meow)
+Should be fully supported or nearly so.
 ## Development notes
-Parsing arbitrary `--help` output is a bit tricky. The [jc](https://github.com/kellyjonbrazil/jc) project is a heroic collection of output parsers, but does not currently implement help output parsing. It might be interesting to try to contribute mdat's help parsing implementations to jc.
+Parsing arbitrary `--help` output is a bit tricky.
+You're right to think that an LLM could make quick work of this kind of "fuzzy text to structured data" transcription. However, when this tool was originally developed in 2024, testing a language model approach yielded sub-par results, so I pursued a traditional lexer/parser approach instead. There is also the logistical overhead of providing a smart-enough model both locally and in CI, where this tool frequently runs; it's technically feasible, but unpleasant. While the current hand-tuned parsers are admittedly a brittle tangle, future versions may revisit the LLM approach.
+In terms of prior art, the [jc](https://github.com/kellyjonbrazil/jc) project stands out as a heroic collection of CLI-tool output parsers, but does not currently implement help output parsing. It might be interesting to try to contribute mdat's help parsing implementations to jc.
 Currently, the parser implementation lives in this repository because I really only use it in the context of my CLI tool readme files. In theory, it really belongs in a separate package.