mdat-plugin-cli-help 2.0.2 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/dist/index.js +353 -1
  2. package/package.json +8 -5
  3. package/readme.md +32 -2
package/dist/index.js CHANGED
@@ -134,6 +134,326 @@ function setLogger(logger) {
134
134
  log = injectionHelper(logger);
135
135
  }
136
136
  //#endregion
137
+ //#region src/utilities/parsers/commander.ts
138
+ const flag$2 = createToken({
139
+ name: "flag",
140
+ pattern: /--[\w-]+/
141
+ });
142
+ const alias$2 = createToken({
143
+ name: "alias",
144
+ pattern: /-[A-Z]/i
145
+ });
146
+ const comma$2 = createToken({
147
+ group: Lexer.SKIPPED,
148
+ name: "comma",
149
+ pattern: /,/
150
+ });
151
+ const word$2 = createToken({
152
+ name: "word",
153
+ pattern: /\S+/
154
+ });
155
+ const argument$2 = createToken({
156
+ name: "argument",
157
+ pattern: /<\S+>|\[\S+\]/
158
+ });
159
+ const defaultInfoParens = createToken({
160
+ name: "defaultInfoParens",
161
+ pattern: /\(default:\s.+?\)/
162
+ });
163
+ const whiteSpace$2 = createToken({
164
+ group: Lexer.SKIPPED,
165
+ name: "whiteSpace",
166
+ pattern: /\s/
167
+ });
168
+ const usagePrefix = createToken({
169
+ group: Lexer.SKIPPED,
170
+ name: "usagePrefix",
171
+ pattern: /Usage:\s/
172
+ });
173
+ const startProgramDescription$1 = createToken({
174
+ group: Lexer.SKIPPED,
175
+ name: "startProgramDescription",
176
+ pattern: /\n\n/,
177
+ push_mode: "PROGRAM_DESCRIPTION_MODE"
178
+ });
179
+ const programDescription$2 = createToken({
180
+ name: "programDescription",
181
+ pattern: /.+/
182
+ });
183
+ const endProgramDescription$1 = createToken({
184
+ group: Lexer.SKIPPED,
185
+ name: "endProgramDescription",
186
+ pattern: /\n\n/,
187
+ pop_mode: true
188
+ });
189
+ const startArgumentsSection = createToken({
190
+ name: "startArgumentsSection",
191
+ pattern: /Arguments:\n/,
192
+ push_mode: "SECTION_MODE"
193
+ });
194
+ const startOptionsSection$1 = createToken({
195
+ name: "startOptionsSection",
196
+ pattern: /Options:\n/,
197
+ push_mode: "SECTION_MODE"
198
+ });
199
+ const startCommandsSection$1 = createToken({
200
+ name: "startCommandsSection",
201
+ pattern: /Commands:\n/,
202
+ push_mode: "SECTION_MODE"
203
+ });
204
+ const startRow$2 = createToken({
205
+ name: "startRow",
206
+ pattern: / {2,}/,
207
+ push_mode: "ROW_MODE"
208
+ });
209
+ const rowDescription$2 = createToken({
210
+ name: "rowDescription",
211
+ pattern: / {2}\w.+? {2}/
212
+ });
213
+ const rowDescriptionTerminal$1 = createToken({
214
+ name: "rowDescriptionTerminal",
215
+ pattern: / {2}\w[^(\n]+(?=\(default:)| {2}\w.+/
216
+ });
217
+ const endRow$2 = createToken({
218
+ group: Lexer.SKIPPED,
219
+ name: "endRow",
220
+ pattern: /\n/,
221
+ pop_mode: true
222
+ });
223
+ const endSection$1 = createToken({
224
+ group: Lexer.SKIPPED,
225
+ name: "endSection",
226
+ pattern: /\n+/,
227
+ pop_mode: true
228
+ });
229
+ const lexer$2 = new Lexer({
230
+ defaultMode: "DEFAULT_MODE",
231
+ modes: {
232
+ DEFAULT_MODE: [
233
+ startArgumentsSection,
234
+ startOptionsSection$1,
235
+ startCommandsSection$1,
236
+ usagePrefix,
237
+ startProgramDescription$1,
238
+ argument$2,
239
+ word$2,
240
+ whiteSpace$2
241
+ ],
242
+ PROGRAM_DESCRIPTION_MODE: [endProgramDescription$1, programDescription$2],
243
+ ROW_MODE: [
244
+ endRow$2,
245
+ comma$2,
246
+ defaultInfoParens,
247
+ rowDescription$2,
248
+ rowDescriptionTerminal$1,
249
+ flag$2,
250
+ alias$2,
251
+ argument$2,
252
+ word$2,
253
+ whiteSpace$2
254
+ ],
255
+ SECTION_MODE: [startRow$2, endSection$1]
256
+ }
257
+ });
258
+ const allTokens$2 = [
259
+ flag$2,
260
+ alias$2,
261
+ comma$2,
262
+ word$2,
263
+ argument$2,
264
+ defaultInfoParens,
265
+ whiteSpace$2,
266
+ usagePrefix,
267
+ startProgramDescription$1,
268
+ programDescription$2,
269
+ endProgramDescription$1,
270
+ startArgumentsSection,
271
+ startOptionsSection$1,
272
+ startCommandsSection$1,
273
+ startRow$2,
274
+ rowDescription$2,
275
+ rowDescriptionTerminal$1,
276
+ endRow$2,
277
+ endSection$1
278
+ ];
279
+ var CliParser$2 = class extends CstParser {
280
+ sectionRow = this.RULE("sectionRow", () => {
281
+ this.CONSUME(startRow$2);
282
+ this.MANY(() => {
283
+ this.OR([
284
+ { ALT: () => this.CONSUME(argument$2) },
285
+ { ALT: () => this.CONSUME(alias$2) },
286
+ { ALT: () => this.CONSUME(flag$2) },
287
+ { ALT: () => this.CONSUME(rowDescription$2, { LABEL: "description" }) },
288
+ { ALT: () => this.CONSUME(rowDescriptionTerminal$1, { LABEL: "description" }) },
289
+ { ALT: () => this.CONSUME(defaultInfoParens) },
290
+ { ALT: () => this.CONSUME(word$2, { LABEL: "commandName" }) }
291
+ ]);
292
+ });
293
+ });
294
+ argumentsSection = this.RULE("argumentsSection", () => {
295
+ this.CONSUME(startArgumentsSection);
296
+ this.MANY3(() => {
297
+ this.SUBRULE3(this.sectionRow);
298
+ });
299
+ });
300
+ commandsSection = this.RULE("commandsSection", () => {
301
+ this.CONSUME(startCommandsSection$1);
302
+ this.MANY1(() => {
303
+ this.SUBRULE1(this.sectionRow);
304
+ });
305
+ });
306
+ optionsSection = this.RULE("optionsSection", () => {
307
+ this.CONSUME(startOptionsSection$1);
308
+ this.MANY2(() => {
309
+ this.SUBRULE2(this.sectionRow);
310
+ });
311
+ });
312
+ programHelp = this.RULE("programHelp", () => {
313
+ this.AT_LEAST_ONE(() => {
314
+ this.CONSUME(word$2, { LABEL: "commandName" });
315
+ });
316
+ this.MANY1(() => {
317
+ this.CONSUME(argument$2);
318
+ });
319
+ this.OPTION(() => {
320
+ this.CONSUME(programDescription$2, { LABEL: "description" });
321
+ });
322
+ this.OPTION1(() => {
323
+ this.SUBRULE(this.argumentsSection);
324
+ });
325
+ this.OPTION2(() => {
326
+ this.SUBRULE(this.optionsSection);
327
+ });
328
+ this.OPTION3(() => {
329
+ this.SUBRULE(this.commandsSection);
330
+ });
331
+ });
332
+ constructor() {
333
+ super(allTokens$2);
334
+ this.performSelfAnalysis();
335
+ }
336
+ };
337
+ const parser$2 = new CliParser$2();
338
+ var CliHelpToObjectVisitor$2 = class extends parser$2.getBaseCstVisitorConstructor() {
339
+ constructor() {
340
+ super();
341
+ this.validateVisitor();
342
+ }
343
+ argumentsSection(context) {
344
+ return context.sectionRow.map((entry) => {
345
+ const row = this.visit(entry);
346
+ return {
347
+ arguments: row.commandName ? [row.commandName] : void 0,
348
+ defaultValue: row.defaultValue,
349
+ description: row.description
350
+ };
351
+ });
352
+ }
353
+ commandsSection(context) {
354
+ return context.sectionRow.map((entry) => this.visit(entry));
355
+ }
356
+ optionsSection(context) {
357
+ return context.sectionRow.map((entry) => this.visit(entry));
358
+ }
359
+ programHelp(context) {
360
+ const { command: commandName, subcommand: subcommandName } = getCommandParts(this.getString(context.commandName));
361
+ return {
362
+ arguments: this.getArray(context.argument),
363
+ commandName,
364
+ commands: context.commandsSection ? this.visit(context.commandsSection) : void 0,
365
+ description: this.getString(context.description),
366
+ options: context.optionsSection ? this.visit(context.optionsSection) : void 0,
367
+ positionals: context.argumentsSection ? this.visit(context.argumentsSection) : void 0,
368
+ subcommandName
369
+ };
370
+ }
371
+ sectionRow(context) {
372
+ return {
373
+ aliases: this.getArray(context.alias),
374
+ arguments: this.getArray(context.argument),
375
+ commandName: this.getString(context.commandName),
376
+ defaultValue: this.cleanDefault(this.getString(context.defaultInfoParens)),
377
+ description: this.trimDescription(this.getString(context.description)),
378
+ flags: this.getArray(context.flag)
379
+ };
380
+ }
381
+ /**
382
+ * Clean a Commander default value: strip `(default: ...)` wrapper, env info, and quotes.
383
+ */
384
+ cleanDefault(text) {
385
+ if (text === void 0) return void 0;
386
+ let cleaned = text.replaceAll(/^\(default:\s*/g, "").replaceAll(/\)$/g, "").trim();
387
+ cleaned = cleaned.replaceAll(/,\s*env:\s*\S+$/g, "").trim();
388
+ cleaned = cleaned.replaceAll(/^["']|["']$/g, "");
389
+ return cleaned || void 0;
390
+ }
391
+ getArray(context) {
392
+ if (context === void 0) return void 0;
393
+ return context.map((entry) => entry.image);
394
+ }
395
+ getString(context) {
396
+ if (context === void 0) return void 0;
397
+ return context.map((entry) => entry.image).join(" ");
398
+ }
399
+ /**
400
+ * Trim leading/trailing whitespace from description text.
401
+ */
402
+ trimDescription(text) {
403
+ if (text === void 0) return void 0;
404
+ return text.trim() || void 0;
405
+ }
406
+ };
407
+ const visitor$2 = new CliHelpToObjectVisitor$2();
408
+ /**
409
+ * Converts an unstructured help string emitted from a CLI tool built with the
410
+ * `Commander` CLI library and turn it into a structured POJO describing the
411
+ * command.
412
+ */
413
+ function helpStringToObject$3(helpString) {
414
+ if (!helpString.trimStart().startsWith("Usage:")) throw new Error("Not a Commander-format help string (must start with \"Usage:\")");
415
+ const unwrapped = unwrapContinuationLines$1(helpString);
416
+ const lexingResult = lexer$2.tokenize(unwrapped);
417
+ if (lexingResult.errors.length > 0) throw new Error(`Errors lexing CLI command: ${JSON.stringify(lexingResult.errors, void 0, 2)}`);
418
+ parser$2.input = lexingResult.tokens;
419
+ const cst = parser$2.programHelp();
420
+ if (parser$2.errors.length > 0) throw new Error(`Errors parsing CLI command help text: ${JSON.stringify(parser$2.errors, void 0, 2)}`);
421
+ let programInfo;
422
+ try {
423
+ programInfo = visitor$2.visit(cst);
424
+ } catch (error) {
425
+ if (error instanceof Error) throw new TypeError(`Errors visiting CLI command help text: ${String(error)}`);
426
+ }
427
+ if (programInfo === void 0) throw new Error("Could not parse help string");
428
+ if (programInfo.commands) {
429
+ programInfo.commands = programInfo.commands.filter((cmd) => cmd.commandName !== void 0 || cmd.description !== void 0);
430
+ programInfo.commands = programInfo.commands.filter((cmd) => cmd.commandName !== "help");
431
+ for (const cmd of programInfo.commands) if (cmd.commandName && !cmd.parentCommandName) cmd.parentCommandName = programInfo.commandName;
432
+ if (programInfo.commands.length === 0) programInfo.commands = void 0;
433
+ }
434
+ return programInfo;
435
+ }
436
+ /**
437
+ * Join continuation lines in Commander help output before lexing.
438
+ *
439
+ * Commander wraps long descriptions and default values across multiple lines,
440
+ * indenting continuation lines to align with the description start column
441
+ * (typically 30+ spaces). The lexer's ROW_MODE exits on newline, so we must
442
+ * unwrap these before tokenizing.
443
+ *
444
+ * Detection: a continuation line has 4+ leading spaces and does NOT start a
445
+ * new row (which would be exactly 2 spaces + a non-space character).
446
+ */
447
+ const continuationLinePattern = /^ {4,}/;
448
+ const newRowPattern = /^ {2}\S/;
449
+ function unwrapContinuationLines$1(helpString) {
450
+ const lines = helpString.split("\n");
451
+ const result = [];
452
+ for (const line of lines) if (result.length > 0 && line.length > 0 && continuationLinePattern.test(line) && !newRowPattern.test(line)) result[result.length - 1] += " " + line.trim();
453
+ else result.push(line);
454
+ return result.join("\n");
455
+ }
456
+ //#endregion
137
457
  //#region src/utilities/parsers/meow.ts
138
458
  const flag$1 = createToken({
139
459
  name: "flag",
@@ -657,7 +977,8 @@ const visitor = new CliHelpToObjectVisitor();
657
977
  * command.
658
978
  */
659
979
  function helpStringToObject$1(helpString) {
660
- const lexingResult = lexer.tokenize(helpString);
980
+ const unwrapped = unwrapContinuationLines(helpString);
981
+ const lexingResult = lexer.tokenize(unwrapped);
661
982
  if (lexingResult.errors.length > 0) throw new Error(`Errors lexing CLI command: ${JSON.stringify(lexingResult.errors, void 0, 2)}`);
662
983
  parser.input = lexingResult.tokens;
663
984
  const cst = parser.programHelp();
@@ -671,9 +992,40 @@ function helpStringToObject$1(helpString) {
671
992
  if (programInfo === void 0) throw new Error("Could not parse help string");
672
993
  return programInfo;
673
994
  }
995
+ /**
996
+ * Join continuation lines in Yargs help output before lexing.
997
+ *
998
+ * When terminal width is narrow, Yargs wraps long descriptions across multiple
999
+ * lines, indenting continuations to align with the description start column.
1000
+ * The lexer's ROW_MODE exits on newline, so we must unwrap these first.
1001
+ *
1002
+ * Detection: a continuation line has 4+ leading spaces and its first non-space
1003
+ * character is NOT `-` (which would indicate a new option/alias row). Yargs
1004
+ * uses variable indentation for option rows (2 spaces for aliased options like
1005
+ * `-r, --rules`, 6 spaces for long-only options like `--config`), so we cannot
1006
+ * rely on indent depth alone to distinguish new rows from continuations.
1007
+ */
1008
+ const deepIndentPattern = /^ {4,}/;
1009
+ const newOptionRowPattern = /^ *-/;
1010
+ const sectionHeaderPattern = /^(?:Options|Commands|Positionals):?\s*$/;
1011
+ function unwrapContinuationLines(helpString) {
1012
+ const lines = helpString.split("\n");
1013
+ const result = [];
1014
+ let inSection = false;
1015
+ for (const line of lines) if (sectionHeaderPattern.test(line)) {
1016
+ inSection = true;
1017
+ result.push(line);
1018
+ } else if (line.trim() === "") {
1019
+ inSection = false;
1020
+ result.push(line);
1021
+ } else if (inSection && result.length > 0 && line.length > 0 && deepIndentPattern.test(line) && !newOptionRowPattern.test(line)) result[result.length - 1] += " " + line.trim();
1022
+ else result.push(line);
1023
+ return result.join("\n");
1024
+ }
674
1025
  //#endregion
675
1026
  //#region src/utilities/parsers/index.ts
676
1027
  var parsers_default = {
1028
+ commander: helpStringToObject$3,
677
1029
  yargs: helpStringToObject$1,
678
1030
  meow: helpStringToObject$2
679
1031
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mdat-plugin-cli-help",
3
- "version": "2.0.2",
3
+ "version": "2.1.1",
4
4
  "description": "Mdat plugin to generate tabular help documentation for CLI tools in Markdown files.",
5
5
  "keywords": [
6
6
  "markdown",
@@ -38,20 +38,23 @@
38
38
  "@types/which": "^3.0.4",
39
39
  "chevrotain": "^12.0.0",
40
40
  "execa": "^9.6.1",
41
- "lognow": "^0.6.0",
41
+ "lognow": "^0.6.1",
42
42
  "type-fest": "^5.5.0",
43
43
  "which": "^6.0.1",
44
44
  "zod": "^4.3.6"
45
45
  },
46
46
  "devDependencies": {
47
- "@kitschpatrol/shared-config": "^7.0.0",
47
+ "@kitschpatrol/shared-config": "^7.1.0",
48
48
  "@types/node": "~22.17.2",
49
+ "@types/yargs": "^17.0.35",
49
50
  "bumpp": "^11.0.1",
50
- "mdat": "^2.0.0",
51
+ "commander": "^14.0.3",
52
+ "mdat": "^2.2.0",
51
53
  "meow": "^14.1.0",
52
54
  "tsdown": "^0.21.7",
53
55
  "typescript": "~5.9.3",
54
- "vitest": "^4.1.2"
56
+ "vitest": "^4.1.2",
57
+ "yargs": "^18.0.0"
55
58
  },
56
59
  "peerDependencies": {
57
60
  "mdat": "^2.0.0"
package/readme.md CHANGED
@@ -26,7 +26,7 @@ This plugin automatically transforms a CLI command's `--help` output into nicely
26
26
 
27
27
  The rule also recursively calls `--help` on any subcommands found for inclusion in the output.
28
28
 
29
- Currently, the rule can only parse help output in the format provided by [Yargs](https://yargs.js.org)- and [Meow](https://github.com/sindresorhus/meow)-based tools. If parsing fails, the rule will fall back to show the raw help output in a regular code block instead.
29
+ Currently, the rule can parse help output in the format provided by [Commander](https://github.com/tj/commander.js)-, [Yargs](https://yargs.js.org)-, and [Meow](https://github.com/sindresorhus/meow)-based tools. If parsing fails, the rule will fall back to show the raw help output in a regular code block instead.
30
30
 
31
31
  ## Getting started
32
32
 
@@ -119,9 +119,39 @@ If you embed the rule without any arguments, it will look for the binary file li
119
119
  <!-- cli-help -->
120
120
  ```
121
121
 
122
+ ### Supported CLI frameworks
123
+
124
+ #### [Yargs](https://yargs.js.org)
125
+
126
+ Fully supported, including options, commands, positionals, choices, defaults, and type annotations.
127
+
128
+ The parser handles line-wrapped output by unwrapping continuation lines before parsing. However, when Yargs wraps command _arguments_ onto new lines at very narrow terminal widths (e.g. below \~70 columns), those wrapped argument lines are indistinguishable from new command rows and cannot be reliably unwrapped. In practice, this is rare.
129
+
130
+ For the most reliable parsing if you control the upstream project, configure your Yargs CLI to disable wrapping:
131
+
132
+ ```ts
133
+ yargs(process.argv).wrap(process.stdout.isTTY ? Math.min(120, yargs.terminalWidth()) : 0)
134
+ ```
135
+
136
+ This outputs unwrapped help text when piped, while preserving normal wrapping for interactive use.
137
+
138
+ #### [Commander](https://github.com/tj/commander.js)
139
+
140
+ Fully supported, including options, commands, arguments (positionals), and parenthesized defaults with optional environment variable annotations (e.g. `(default: "value", env: MY_VAR)`).
141
+
142
+ The parser handles line-wrapped output by unwrapping continuation lines before parsing. Commander's built-in `help` command is automatically filtered from subcommand recursion to avoid duplicate output.
143
+
144
+ #### [Meow](https://github.com/sindresorhus/meow)
145
+
146
+ Should be fully supported or nearly so.
147
+
122
148
  ## Development notes
123
149
 
124
- Parsing arbitrary `--help` output is a bit tricky. The [jc](https://github.com/kellyjonbrazil/jc) project is a heroic collection of output parsers, but does not currently implement help output parsing. It might be interesting to try to contribute mdat's help parsing implementations to jc.
150
+ Parsing arbitrary `--help` output is a bit tricky.
151
+
152
+ You're right to think that an LLM could make quick work of this kind of "fuzzy text to structured data" transcription. However, when this tool was originally developed in 2024, testing a language model approach yielded sub-par results, so I pursued a traditional lexer/parser approach instead. There is also the logistical overhead of providing a smart-enough model both locally and in CI, where this tool frequently runs; it's technically feasible, but unpleasant. While the current hand-tuned parsers are admittedly a brittle tangle, future versions may revisit the LLM approach.
153
+
154
+ In terms of prior art, the [jc](https://github.com/kellyjonbrazil/jc) project stands out as a heroic collection of CLI-tool output parsers, but does not currently implement help output parsing. It might be interesting to try to contribute mdat's help parsing implementations to jc.
125
155
 
126
156
  Currently, the parser implementation lives in this repository because I really only use it in the context of my CLI tool readme files. In theory, it really belongs in a separate package.
127
157