mdat-plugin-cli-help 2.1.0 → 2.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/dist/index.js +85 -5
  2. package/package.json +3 -3
  3. package/readme.md +31 -1
package/dist/index.js CHANGED
@@ -186,6 +186,11 @@ const endProgramDescription$1 = createToken({
186
186
  pattern: /\n\n/,
187
187
  pop_mode: true
188
188
  });
189
+ const startArgumentsSection = createToken({
190
+ name: "startArgumentsSection",
191
+ pattern: /Arguments:\n/,
192
+ push_mode: "SECTION_MODE"
193
+ });
189
194
  const startOptionsSection$1 = createToken({
190
195
  name: "startOptionsSection",
191
196
  pattern: /Options:\n/,
@@ -225,6 +230,7 @@ const lexer$2 = new Lexer({
225
230
  defaultMode: "DEFAULT_MODE",
226
231
  modes: {
227
232
  DEFAULT_MODE: [
233
+ startArgumentsSection,
228
234
  startOptionsSection$1,
229
235
  startCommandsSection$1,
230
236
  usagePrefix,
@@ -261,6 +267,7 @@ const allTokens$2 = [
261
267
  startProgramDescription$1,
262
268
  programDescription$2,
263
269
  endProgramDescription$1,
270
+ startArgumentsSection,
264
271
  startOptionsSection$1,
265
272
  startCommandsSection$1,
266
273
  startRow$2,
@@ -284,6 +291,12 @@ var CliParser$2 = class extends CstParser {
284
291
  ]);
285
292
  });
286
293
  });
294
+ argumentsSection = this.RULE("argumentsSection", () => {
295
+ this.CONSUME(startArgumentsSection);
296
+ this.MANY3(() => {
297
+ this.SUBRULE3(this.sectionRow);
298
+ });
299
+ });
287
300
  commandsSection = this.RULE("commandsSection", () => {
288
301
  this.CONSUME(startCommandsSection$1);
289
302
  this.MANY1(() => {
@@ -307,9 +320,12 @@ var CliParser$2 = class extends CstParser {
307
320
  this.CONSUME(programDescription$2, { LABEL: "description" });
308
321
  });
309
322
  this.OPTION1(() => {
310
- this.SUBRULE(this.optionsSection);
323
+ this.SUBRULE(this.argumentsSection);
311
324
  });
312
325
  this.OPTION2(() => {
326
+ this.SUBRULE(this.optionsSection);
327
+ });
328
+ this.OPTION3(() => {
313
329
  this.SUBRULE(this.commandsSection);
314
330
  });
315
331
  });
@@ -324,6 +340,16 @@ var CliHelpToObjectVisitor$2 = class extends parser$2.getBaseCstVisitorConstruct
324
340
  super();
325
341
  this.validateVisitor();
326
342
  }
343
+ argumentsSection(context) {
344
+ return context.sectionRow.map((entry) => {
345
+ const row = this.visit(entry);
346
+ return {
347
+ arguments: row.commandName ? [row.commandName] : void 0,
348
+ defaultValue: row.defaultValue,
349
+ description: row.description
350
+ };
351
+ });
352
+ }
327
353
  commandsSection(context) {
328
354
  return context.sectionRow.map((entry) => this.visit(entry));
329
355
  }
@@ -338,6 +364,7 @@ var CliHelpToObjectVisitor$2 = class extends parser$2.getBaseCstVisitorConstruct
338
364
  commands: context.commandsSection ? this.visit(context.commandsSection) : void 0,
339
365
  description: this.getString(context.description),
340
366
  options: context.optionsSection ? this.visit(context.optionsSection) : void 0,
367
+ positionals: context.argumentsSection ? this.visit(context.argumentsSection) : void 0,
341
368
  subcommandName
342
369
  };
343
370
  }
@@ -385,7 +412,8 @@ const visitor$2 = new CliHelpToObjectVisitor$2();
385
412
  */
386
413
  function helpStringToObject$3(helpString) {
387
414
  if (!helpString.trimStart().startsWith("Usage:")) throw new Error("Not a Commander-format help string (must start with \"Usage:\")");
388
- const lexingResult = lexer$2.tokenize(helpString);
415
+ const unwrapped = unwrapContinuationLines$1(helpString);
416
+ const lexingResult = lexer$2.tokenize(unwrapped);
389
417
  if (lexingResult.errors.length > 0) throw new Error(`Errors lexing CLI command: ${JSON.stringify(lexingResult.errors, void 0, 2)}`);
390
418
  parser$2.input = lexingResult.tokens;
391
419
  const cst = parser$2.programHelp();
@@ -399,11 +427,32 @@ function helpStringToObject$3(helpString) {
399
427
  if (programInfo === void 0) throw new Error("Could not parse help string");
400
428
  if (programInfo.commands) {
401
429
  programInfo.commands = programInfo.commands.filter((cmd) => cmd.commandName !== void 0 || cmd.description !== void 0);
430
+ programInfo.commands = programInfo.commands.filter((cmd) => cmd.commandName !== "help");
402
431
  for (const cmd of programInfo.commands) if (cmd.commandName && !cmd.parentCommandName) cmd.parentCommandName = programInfo.commandName;
403
432
  if (programInfo.commands.length === 0) programInfo.commands = void 0;
404
433
  }
405
434
  return programInfo;
406
435
  }
436
+ /**
437
+ * Join continuation lines in Commander help output before lexing.
438
+ *
439
+ * Commander wraps long descriptions and default values across multiple lines,
440
+ * indenting continuation lines to align with the description start column
441
+ * (typically 30+ spaces). The lexer's ROW_MODE exits on newline, so we must
442
+ * unwrap these before tokenizing.
443
+ *
444
+ * Detection: a continuation line has 4+ leading spaces and does NOT start a
445
+ * new row (which would be exactly 2 spaces + a non-space character).
446
+ */
447
+ const continuationLinePattern = /^ {4,}/;
448
+ const newRowPattern = /^ {2}\S/;
449
+ function unwrapContinuationLines$1(helpString) {
450
+ const lines = helpString.split("\n");
451
+ const result = [];
452
+ for (const line of lines) if (result.length > 0 && line.length > 0 && continuationLinePattern.test(line) && !newRowPattern.test(line)) result[result.length - 1] += " " + line.trim();
453
+ else result.push(line);
454
+ return result.join("\n");
455
+ }
407
456
  //#endregion
408
457
  //#region src/utilities/parsers/meow.ts
409
458
  const flag$1 = createToken({
@@ -928,7 +977,8 @@ const visitor = new CliHelpToObjectVisitor();
928
977
  * command.
929
978
  */
930
979
  function helpStringToObject$1(helpString) {
931
- const lexingResult = lexer.tokenize(helpString);
980
+ const unwrapped = unwrapContinuationLines(helpString);
981
+ const lexingResult = lexer.tokenize(unwrapped);
932
982
  if (lexingResult.errors.length > 0) throw new Error(`Errors lexing CLI command: ${JSON.stringify(lexingResult.errors, void 0, 2)}`);
933
983
  parser.input = lexingResult.tokens;
934
984
  const cst = parser.programHelp();
@@ -942,6 +992,36 @@ function helpStringToObject$1(helpString) {
942
992
  if (programInfo === void 0) throw new Error("Could not parse help string");
943
993
  return programInfo;
944
994
  }
995
+ /**
996
+ * Join continuation lines in Yargs help output before lexing.
997
+ *
998
+ * When terminal width is narrow, Yargs wraps long descriptions across multiple
999
+ * lines, indenting continuations to align with the description start column.
1000
+ * The lexer's ROW_MODE exits on newline, so we must unwrap these first.
1001
+ *
1002
+ * Detection: a continuation line has 4+ leading spaces and its first non-space
1003
+ * character is NOT `-` (which would indicate a new option/alias row). Yargs
1004
+ * uses variable indentation for option rows (2 spaces for aliased options like
1005
+ * `-r, --rules`, 6 spaces for long-only options like `--config`), so we cannot
1006
+ * rely on indent depth alone to distinguish new rows from continuations.
1007
+ */
1008
+ const deepIndentPattern = /^ {4,}/;
1009
+ const newOptionRowPattern = /^ *-/;
1010
+ const sectionHeaderPattern = /^(?:Options|Commands|Positionals):?\s*$/;
1011
+ function unwrapContinuationLines(helpString) {
1012
+ const lines = helpString.split("\n");
1013
+ const result = [];
1014
+ let inSection = false;
1015
+ for (const line of lines) if (sectionHeaderPattern.test(line)) {
1016
+ inSection = true;
1017
+ result.push(line);
1018
+ } else if (line.trim() === "") {
1019
+ inSection = false;
1020
+ result.push(line);
1021
+ } else if (inSection && result.length > 0 && line.length > 0 && deepIndentPattern.test(line) && !newOptionRowPattern.test(line)) result[result.length - 1] += " " + line.trim();
1022
+ else result.push(line);
1023
+ return result.join("\n");
1024
+ }
945
1025
  //#endregion
946
1026
  //#region src/utilities/parsers/index.ts
947
1027
  var parsers_default = {
@@ -986,7 +1066,7 @@ function helpStringToObject(helpString) {
986
1066
  continue;
987
1067
  }
988
1068
  }
989
- log.warn("Could not parse help string with any parser");
1069
+ log.debug("Could not parse help string with any parser");
990
1070
  }
991
1071
  //#endregion
992
1072
  //#region src/utilities/get-help-markdown.ts
@@ -1004,7 +1084,7 @@ async function getHelpMarkdownInternal(executable, subcommands, helpFlag, depth)
1004
1084
  const rawHelpString = await getHelpString(executable, [...subcommands, helpFlag]);
1005
1085
  const programInfo = helpStringToObject(rawHelpString);
1006
1086
  if (programInfo === void 0) {
1007
- log.warn(`Falling back to basic cli help text output.`);
1087
+ log.debug(`Falling back to basic cli help text output.`);
1008
1088
  return renderHelpMarkdownBasic(rawHelpString);
1009
1089
  }
1010
1090
  return renderHelpMarkdownObject(executable, subcommands, helpFlag, depth, programInfo);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mdat-plugin-cli-help",
3
- "version": "2.1.0",
3
+ "version": "2.1.2",
4
4
  "description": "Mdat plugin to generate tabular help documentation for CLI tools in Markdown files.",
5
5
  "keywords": [
6
6
  "markdown",
@@ -49,11 +49,11 @@
49
49
  "@types/yargs": "^17.0.35",
50
50
  "bumpp": "^11.0.1",
51
51
  "commander": "^14.0.3",
52
- "mdat": "^2.2.0",
52
+ "mdat": "^2.2.1",
53
53
  "meow": "^14.1.0",
54
54
  "tsdown": "^0.21.7",
55
55
  "typescript": "~5.9.3",
56
- "vitest": "^4.1.2",
56
+ "vitest": "^4.1.3",
57
57
  "yargs": "^18.0.0"
58
58
  },
59
59
  "peerDependencies": {
package/readme.md CHANGED
@@ -119,9 +119,39 @@ If you embed the rule without any arguments, it will look for the binary file li
119
119
  <!-- cli-help -->
120
120
  ```
121
121
 
122
+ ### Supported CLI frameworks
123
+
124
+ #### [Yargs](https://yargs.js.org)
125
+
126
+ Fully supported, including options, commands, positionals, choices, defaults, and type annotations.
127
+
128
+ The parser handles line-wrapped output by unwrapping continuation lines before parsing. However, when Yargs wraps command _arguments_ onto new lines at very narrow terminal widths (e.g. below \~70 columns), those wrapped argument lines are indistinguishable from new command rows and cannot be reliably unwrapped. In practice, this is rare.
129
+
130
+ For the most reliable parsing if you control the upstream project, configure your Yargs CLI to disable wrapping:
131
+
132
+ ```ts
133
+ yargs(process.argv).wrap(process.stdout.isTTY ? Math.min(120, yargs.terminalWidth()) : 0)
134
+ ```
135
+
136
+ This outputs unwrapped help text when piped, while preserving normal wrapping for interactive use.
137
+
138
+ #### [Commander](https://github.com/tj/commander.js)
139
+
140
+ Fully supported, including options, commands, arguments (positionals), and parenthesized defaults with optional environment variable annotations (e.g. `(default: "value", env: MY_VAR)`).
141
+
142
+ The parser handles line-wrapped output by unwrapping continuation lines before parsing. Commander's built-in `help` command is automatically filtered from subcommand recursion to avoid duplicate output.
143
+
144
+ #### [Meow](https://github.com/sindresorhus/meow)
145
+
146
+ Should be fully supported or nearly so.
147
+
122
148
  ## Development notes
123
149
 
124
- Parsing arbitrary `--help` output is a bit tricky. The [jc](https://github.com/kellyjonbrazil/jc) project is a heroic collection of output parsers, but does not currently implement help output parsing. It might be interesting to try to contribute mdat's help parsing implementations to jc.
150
+ Parsing arbitrary `--help` output is a bit tricky.
151
+
152
+ You're right to think that an LLM could make quick work of this kind of "fuzzy text to structured data" transcription. However, when this tool was originally developed in 2024, testing a language model approach yielded sub-par results, so I pursued a traditional lexer/parser approach instead. There is also the logistical overhead of providing a smart-enough model both locally and in CI, where this tool frequently runs; it's technically feasible, but unpleasant. While the current hand-tuned parsers are admittedly a brittle tangle, future versions may revisit the LLM approach.
153
+
154
+ In terms of prior art, the [jc](https://github.com/kellyjonbrazil/jc) project stands out as a heroic collection of CLI-tool output parsers, but does not currently implement help output parsing. It might be interesting to try to contribute mdat's help parsing implementations to jc.
125
155
 
126
156
  Currently, the parser implementation lives in this repository because I really only use it in the context of my CLI tool readme files. In theory, it really belongs in a separate package.
127
157