@gmb/bitmark-parser-generator 5.15.0 → 5.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/main.js CHANGED
@@ -10876,7 +10876,7 @@ var instance2 = new Config();
10876
10876
  // src/generated/package_info.ts
10877
10877
  var PACKAGE_INFO = {
10878
10878
  "name": "@gmb/bitmark-parser-generator",
10879
- "version": "5.15.0",
10879
+ "version": "5.16.0",
10880
10880
  "license": "ISC"};
10881
10881
  var Environment = {
10882
10882
  unknown: "",
@@ -39294,6 +39294,265 @@ var JsonFileGenerator = class {
39294
39294
  this.generator.generateSync(_ast);
39295
39295
  }
39296
39296
  };
39297
+
39298
+ // src/generator/plainText/PlainTextGenerator.ts
39299
+ var TEXT_NODE_TYPES = new Set(Object.values(TextNodeType));
39300
+ var PlainTextGenerator = class {
39301
+ /**
39302
+ * Generate plain text from a string or JSON object.
39303
+ *
39304
+ * @param input - A string (plain or JSON-encoded) or a parsed JSON value.
39305
+ * @returns The extracted plain text.
39306
+ */
39307
+ generate(input) {
39308
+ let data = input;
39309
+ if (typeof data === "string") {
39310
+ try {
39311
+ data = JSON.parse(data);
39312
+ } catch (_e) {
39313
+ return data;
39314
+ }
39315
+ }
39316
+ return this.walk(data).trim();
39317
+ }
39318
+ // ---------------------------------------------------------------------------
39319
+ // Private helpers
39320
+ // ---------------------------------------------------------------------------
39321
+ walk(value) {
39322
+ if (value == null) return "";
39323
+ if (typeof value === "string") return value;
39324
+ if (typeof value !== "object") return "";
39325
+ if (Array.isArray(value)) {
39326
+ if (value.length === 0) return "";
39327
+ if (this.isTextAst(value)) {
39328
+ return this.textAstToPlainText(value);
39329
+ }
39330
+ return value.map((v) => this.walk(v)).filter(Boolean).join("\n");
39331
+ }
39332
+ const obj = value;
39333
+ if (this.isTextNode(obj)) {
39334
+ return this.textNodeToPlainText(obj);
39335
+ }
39336
+ if (this.isBitWrapper(obj)) {
39337
+ return this.walk(obj["bit"]);
39338
+ }
39339
+ const parts = [];
39340
+ for (const val of Object.values(obj)) {
39341
+ if (val == null || typeof val !== "object") continue;
39342
+ const text = this.walk(val);
39343
+ if (text) parts.push(text);
39344
+ }
39345
+ return parts.join("\n");
39346
+ }
39347
+ // -- Type guards -----------------------------------------------------------
39348
+ isTextNode(obj) {
39349
+ return typeof obj["type"] === "string" && TEXT_NODE_TYPES.has(obj["type"]);
39350
+ }
39351
+ isTextAst(arr) {
39352
+ const first = arr[0];
39353
+ return typeof first === "object" && first !== null && !Array.isArray(first) && this.isTextNode(first);
39354
+ }
39355
+ isBitWrapper(obj) {
39356
+ return "bit" in obj && typeof obj["bit"] === "object" && obj["bit"] !== null && !Array.isArray(obj["bit"]);
39357
+ }
39358
+ // -- TextNode extraction ---------------------------------------------------
39359
+ textAstToPlainText(ast) {
39360
+ return ast.map((node) => this.textNodeToPlainText(node)).join("\n");
39361
+ }
39362
+ textNodeToPlainText(node) {
39363
+ const { type, text, content } = node;
39364
+ switch (type) {
39365
+ case TextNodeType.text:
39366
+ return this.textWithMarks(node);
39367
+ case TextNodeType.hardBreak:
39368
+ return "\n";
39369
+ // Block elements whose children are joined without extra separator
39370
+ case TextNodeType.paragraph:
39371
+ case TextNodeType.heading:
39372
+ case TextNodeType.section:
39373
+ case TextNodeType.gap:
39374
+ case TextNodeType.select:
39375
+ case TextNodeType.highlight:
39376
+ case TextNodeType.mark:
39377
+ case TextNodeType.codeBlock:
39378
+ return content ? content.map((c) => this.textNodeToPlainText(c)).join("") : text ?? "";
39379
+ // List items are handled by listToPlainText with indent context
39380
+ case TextNodeType.listItem:
39381
+ return content ? content.map((c) => this.textNodeToPlainText(c)).join("") : text ?? "";
39382
+ // Task item – handled by taskListToPlainText, but fallback if encountered standalone
39383
+ case TextNodeType.taskItem: {
39384
+ const checked = node.attrs?.checked ?? false;
39385
+ const prefix = checked ? "[x] " : "[ ] ";
39386
+ const itemText = content ? content.map((c) => this.textNodeToPlainText(c)).join("") : text ?? "";
39387
+ return `${prefix}${itemText}`;
39388
+ }
39389
+ // List containers – rendered with indent-aware helper
39390
+ case TextNodeType.noBulletList:
39391
+ case TextNodeType.bulletList:
39392
+ case TextNodeType.orderedList:
39393
+ case TextNodeType.orderedListRoman:
39394
+ case TextNodeType.orderedListRomanLower:
39395
+ case TextNodeType.letteredList:
39396
+ case TextNodeType.letteredListLower:
39397
+ return this.listToPlainText(node, 0);
39398
+ // Task list – rendered with indent-aware helper
39399
+ case TextNodeType.taskList:
39400
+ return this.taskListToPlainText(node, 0);
39401
+ // Images – return alt text when available
39402
+ case TextNodeType.image:
39403
+ case TextNodeType.imageInline: {
39404
+ const attrs = node.attrs;
39405
+ return attrs?.alt ?? "";
39406
+ }
39407
+ // LaTeX – return the formula source
39408
+ case TextNodeType.latex: {
39409
+ const latexAttrs = node.attrs;
39410
+ return latexAttrs?.formula ?? "";
39411
+ }
39412
+ default:
39413
+ return content ? content.map((c) => this.textNodeToPlainText(c)).join("") : text ?? "";
39414
+ }
39415
+ }
39416
+ listToPlainText(node, depth) {
39417
+ const { type, content } = node;
39418
+ if (!content || content.length === 0) return "";
39419
+ const indent = " ".repeat(depth);
39420
+ const start = node.attrs?.start ?? 1;
39421
+ const displayStart = start < 1 ? start + 1 : start;
39422
+ return content.map((child, i) => {
39423
+ const { inline, nested } = this.splitListItemContent(child, depth);
39424
+ const prefix = this.listItemPrefix(type, displayStart + i);
39425
+ const line = `${indent}${prefix}${inline}`;
39426
+ return nested ? `${line}
39427
+ ${nested}` : line;
39428
+ }).join("\n");
39429
+ }
39430
+ taskListToPlainText(node, depth) {
39431
+ const { content } = node;
39432
+ if (!content || content.length === 0) return "";
39433
+ const indent = " ".repeat(depth);
39434
+ return content.map((child) => {
39435
+ const checked = child.attrs?.checked ?? false;
39436
+ const prefix = checked ? "[x] " : "[ ] ";
39437
+ const { inline, nested } = this.splitListItemContent(child, depth);
39438
+ const line = `${indent}${prefix}${inline}`;
39439
+ return nested ? `${line}
39440
+ ${nested}` : line;
39441
+ }).join("\n");
39442
+ }
39443
+ splitListItemContent(item, depth) {
39444
+ const children = item.content ?? [];
39445
+ const inlineParts = [];
39446
+ const nestedParts = [];
39447
+ for (const child of children) {
39448
+ if (this.isListType(child.type)) {
39449
+ nestedParts.push(this.renderNestedList(child, depth + 1));
39450
+ } else {
39451
+ inlineParts.push(this.textNodeToPlainText(child));
39452
+ }
39453
+ }
39454
+ return {
39455
+ inline: inlineParts.join(""),
39456
+ nested: nestedParts.join("\n")
39457
+ };
39458
+ }
39459
+ isListType(type) {
39460
+ return type === TextNodeType.bulletList || type === TextNodeType.orderedList || type === TextNodeType.orderedListRoman || type === TextNodeType.orderedListRomanLower || type === TextNodeType.letteredList || type === TextNodeType.letteredListLower || type === TextNodeType.noBulletList || type === TextNodeType.taskList;
39461
+ }
39462
+ renderNestedList(node, depth) {
39463
+ if (node.type === TextNodeType.taskList) {
39464
+ return this.taskListToPlainText(node, depth);
39465
+ }
39466
+ return this.listToPlainText(node, depth);
39467
+ }
39468
+ listItemPrefix(listType, index) {
39469
+ switch (listType) {
39470
+ case TextNodeType.bulletList:
39471
+ return "\u2022 ";
39472
+ case TextNodeType.orderedList:
39473
+ return `${index}. `;
39474
+ case TextNodeType.orderedListRoman:
39475
+ return `${this.toRoman(index)}. `;
39476
+ case TextNodeType.orderedListRomanLower:
39477
+ return `${this.toRoman(index).toLowerCase()}. `;
39478
+ case TextNodeType.letteredList:
39479
+ return `${this.toLetter(index)}. `;
39480
+ case TextNodeType.letteredListLower:
39481
+ return `${this.toLetter(index).toLowerCase()}. `;
39482
+ case TextNodeType.noBulletList:
39483
+ default:
39484
+ return "";
39485
+ }
39486
+ }
39487
+ toRoman(num) {
39488
+ const romanNumerals = [
39489
+ [1e3, "M"],
39490
+ [900, "CM"],
39491
+ [500, "D"],
39492
+ [400, "CD"],
39493
+ [100, "C"],
39494
+ [90, "XC"],
39495
+ [50, "L"],
39496
+ [40, "XL"],
39497
+ [10, "X"],
39498
+ [9, "IX"],
39499
+ [5, "V"],
39500
+ [4, "IV"],
39501
+ [1, "I"]
39502
+ ];
39503
+ let result = "";
39504
+ let remaining = num;
39505
+ for (const [value, numeral] of romanNumerals) {
39506
+ while (remaining >= value) {
39507
+ result += numeral;
39508
+ remaining -= value;
39509
+ }
39510
+ }
39511
+ return result;
39512
+ }
39513
+ toLetter(num) {
39514
+ let result = "";
39515
+ let remaining = num;
39516
+ while (remaining > 0) {
39517
+ remaining--;
39518
+ result = String.fromCharCode(65 + remaining % 26) + result;
39519
+ remaining = Math.floor(remaining / 26);
39520
+ }
39521
+ return result;
39522
+ }
39523
+ textWithMarks(node) {
39524
+ const { text, marks } = node;
39525
+ const parts = [];
39526
+ const linkMark = marks?.find((m) => m.type === "link");
39527
+ const href = linkMark?.attrs?.href;
39528
+ if (text && href && text !== href) {
39529
+ const hrefBare = href.replace(/^https?:\/\//, "");
39530
+ if (text.includes(hrefBare)) {
39531
+ parts.push(text.replace(hrefBare, href));
39532
+ } else if (text.includes(href)) {
39533
+ parts.push(text);
39534
+ } else {
39535
+ parts.push(`${text} ${href}`);
39536
+ }
39537
+ } else if (text) {
39538
+ parts.push(text);
39539
+ } else if (href) {
39540
+ parts.push(href);
39541
+ }
39542
+ if (marks) {
39543
+ for (const mark of marks) {
39544
+ if (mark.type === "footnote") {
39545
+ const footnote = mark;
39546
+ if (footnote.attrs?.content) {
39547
+ const footnoteText = footnote.attrs.content.map((c) => this.textNodeToPlainText(c)).join("");
39548
+ if (footnoteText) parts.push(footnoteText);
39549
+ }
39550
+ }
39551
+ }
39552
+ }
39553
+ return parts.join(" ");
39554
+ }
39555
+ };
39297
39556
  var normalizeCardKey = (cardSetKey) => stringUtils.camelToKebab(cardSetKey);
39298
39557
  var ConfigBuilder = class {
39299
39558
  build(options) {
@@ -39995,7 +40254,25 @@ var Output = {
39995
40254
  /**
39996
40255
  * Output AST as a plain JS object, or a file
39997
40256
  */
39998
- ast: "ast"
40257
+ ast: "ast",
40258
+ /**
40259
+ * Output plain text as a string, or a file
40260
+ */
40261
+ text: "text"
40262
+ };
40263
+ var InputFormat = {
40264
+ /**
40265
+ * Input is bitmark
40266
+ */
40267
+ bitmark: "bitmark",
40268
+ /**
40269
+ * Input is bitmarkText
40270
+ */
40271
+ bitmarkText: "bitmarkText",
40272
+ /**
40273
+ * Input is plain text
40274
+ */
40275
+ plainText: "plainText"
39999
40276
  };
40000
40277
  var BitmarkParserGenerator = class {
40001
40278
  ast = new Ast();
@@ -40079,6 +40356,12 @@ var BitmarkParserGenerator = class {
40079
40356
  * - input(JSON/AST) ==> output(bitmark)
40080
40357
  * - input(bitmark) ==> output(JSON)
40081
40358
  *
40359
+ * Output type can be overridden to one of the following:
40360
+ * - bitmark: output bitmark string
40361
+ * - json: output JSON as a plain JS object, or a file
40362
+ * - ast: output AST as a plain JS object, or a file
40363
+ * - text: output plain text as a string, or a file
40364
+ *
40082
40365
  * By default, the result is returned as a string for bitmark, or a plain JS object for JSON/AST.
40083
40366
  *
40084
40367
  * The options can be used to write the output to a file and to set conversion options or override defaults.
@@ -40101,6 +40384,7 @@ var BitmarkParserGenerator = class {
40101
40384
  const outputBitmark = outputFormat === Output.bitmark;
40102
40385
  const outputJson = outputFormat === Output.json;
40103
40386
  const outputAst = outputFormat === Output.ast;
40387
+ const outputText = outputFormat === Output.text;
40104
40388
  const bitmarkParserType = BitmarkParserType.peggy;
40105
40389
  let inStr = input;
40106
40390
  const inputIsString = typeof input === "string";
@@ -40147,6 +40431,22 @@ var BitmarkParserGenerator = class {
40147
40431
  }
40148
40432
  }
40149
40433
  };
40434
+ const bitmarkToText = (bitmarkStr) => {
40435
+ ast = this.bitmarkParser.toAst(bitmarkStr, {
40436
+ parserType: bitmarkParserType
40437
+ });
40438
+ const jsonGenerator = new JsonObjectGenerator(opts);
40439
+ const json = jsonGenerator.generateSync(ast);
40440
+ const textGenerator = new PlainTextGenerator();
40441
+ const str = textGenerator.generate(json);
40442
+ if (opts.outputFile) {
40443
+ fs3.writeFileSync(opts.outputFile, str, {
40444
+ encoding: "utf8"
40445
+ });
40446
+ } else {
40447
+ res = str;
40448
+ }
40449
+ };
40150
40450
  const astToBitmark = (astJson) => {
40151
40451
  if (opts.outputFile) {
40152
40452
  const generator = new BitmarkFileGenerator(opts.outputFile, opts);
@@ -40169,6 +40469,19 @@ var BitmarkParserGenerator = class {
40169
40469
  res = this.jsonStringifyPrettify(json, jsonOptions);
40170
40470
  }
40171
40471
  };
40472
+ const astToText = (astJson) => {
40473
+ const jsonGenerator = new JsonObjectGenerator(opts);
40474
+ const json = jsonGenerator.generateSync(astJson);
40475
+ const textGenerator = new PlainTextGenerator();
40476
+ const str = textGenerator.generate(json);
40477
+ if (opts.outputFile) {
40478
+ fs3.writeFileSync(opts.outputFile, str, {
40479
+ encoding: "utf8"
40480
+ });
40481
+ } else {
40482
+ res = str;
40483
+ }
40484
+ };
40172
40485
  const jsonToBitmark = (astJson) => {
40173
40486
  if (opts.outputFile) {
40174
40487
  const generator = new BitmarkFileGenerator(opts.outputFile, opts);
@@ -40181,6 +40494,19 @@ var BitmarkParserGenerator = class {
40181
40494
  const jsonToAst = (astJson) => {
40182
40495
  res = this.jsonStringifyPrettify(astJson, jsonOptions);
40183
40496
  };
40497
+ const jsonToText = (astJson) => {
40498
+ const jsonGenerator = new JsonObjectGenerator(opts);
40499
+ const json = jsonGenerator.generateSync(astJson);
40500
+ const textGenerator = new PlainTextGenerator();
40501
+ const str = textGenerator.generate(json);
40502
+ if (opts.outputFile) {
40503
+ fs3.writeFileSync(opts.outputFile, str, {
40504
+ encoding: "utf8"
40505
+ });
40506
+ } else {
40507
+ res = str;
40508
+ }
40509
+ };
40184
40510
  const jsonToJson = (astJson) => {
40185
40511
  astToJson(astJson);
40186
40512
  };
@@ -40189,6 +40515,8 @@ var BitmarkParserGenerator = class {
40189
40515
  bitmarkToBitmark(inStr);
40190
40516
  } else if (outputAst) {
40191
40517
  bitmarkToAst(inStr);
40518
+ } else if (outputText) {
40519
+ bitmarkToText(inStr);
40192
40520
  } else {
40193
40521
  bitmarkToJson(inStr);
40194
40522
  }
@@ -40198,6 +40526,8 @@ var BitmarkParserGenerator = class {
40198
40526
  astToAst(ast);
40199
40527
  } else if (outputJson) {
40200
40528
  astToJson(ast);
40529
+ } else if (outputText) {
40530
+ astToText(ast);
40201
40531
  } else {
40202
40532
  astToBitmark(ast);
40203
40533
  }
@@ -40207,6 +40537,8 @@ var BitmarkParserGenerator = class {
40207
40537
  jsonToJson(ast);
40208
40538
  } else if (outputAst) {
40209
40539
  jsonToAst(ast);
40540
+ } else if (outputText) {
40541
+ jsonToText(ast);
40210
40542
  } else {
40211
40543
  jsonToBitmark(ast);
40212
40544
  }
@@ -40335,7 +40667,7 @@ var BitmarkParserGenerator = class {
40335
40667
  return res;
40336
40668
  }
40337
40669
  /**
40338
- * Convert bitmark text from JSON, or JSON to bitmark text.
40670
+ * Convert bitmark text to JSON, or JSON to bitmark text.
40339
40671
  *
40340
40672
  * Input type is detected automatically and may be:
40341
40673
  * - string: bitmark text or JSON
@@ -40409,6 +40741,54 @@ var BitmarkParserGenerator = class {
40409
40741
  }
40410
40742
  return res;
40411
40743
  }
40744
+ extractPlainText(input, options) {
40745
+ const dataIn = input;
40746
+ const inputFormat = options?.inputFormat;
40747
+ const isString2 = typeof input === "string";
40748
+ let data;
40749
+ const parseAutomatically = () => {
40750
+ let dataOut = dataIn;
40751
+ if (typeof dataIn === "string") {
40752
+ try {
40753
+ dataOut = JSON.parse(dataIn);
40754
+ } catch (_e) {
40755
+ let isBitmark = false;
40756
+ const bitmarkData = this.convert(dataIn, {
40757
+ outputFormat: Output.json
40758
+ });
40759
+ if (bitmarkData.length > 0) {
40760
+ const isError = bitmarkData[0].bit.type === BitType._error;
40761
+ if (!isError) {
40762
+ isBitmark = true;
40763
+ dataOut = bitmarkData;
40764
+ }
40765
+ }
40766
+ if (!isBitmark) {
40767
+ dataOut = this.convertText(dataIn, {
40768
+ textFormat: TextFormat.bitmarkText
40769
+ });
40770
+ }
40771
+ }
40772
+ }
40773
+ return dataOut;
40774
+ };
40775
+ if (inputFormat === InputFormat.bitmark) {
40776
+ data = this.convert(dataIn, {
40777
+ outputFormat: Output.json
40778
+ });
40779
+ } else if (inputFormat === InputFormat.bitmarkText) {
40780
+ data = this.convertText(dataIn, {
40781
+ textFormat: TextFormat.bitmarkText
40782
+ });
40783
+ } else if (inputFormat === InputFormat.plainText) {
40784
+ if (isString2) data = String(input);
40785
+ } else {
40786
+ data = parseAutomatically();
40787
+ }
40788
+ const generator = new PlainTextGenerator();
40789
+ const res = generator.generate(data);
40790
+ return res;
40791
+ }
40412
40792
  /**
40413
40793
  * Breakscape bitmark text.
40414
40794
  *
@@ -40516,6 +40896,16 @@ var BitmarkParserGenerator = class {
40516
40896
  }
40517
40897
  return;
40518
40898
  }
40899
+ textAstToPlainText(textAst, _options) {
40900
+ const textGenerator = new TextGenerator(BitmarkVersion.v3, {
40901
+ //
40902
+ });
40903
+ const res = textGenerator.generateSync(textAst, TextFormat.bitmarkText, TextLocation.body, {
40904
+ noBreakscaping: true,
40905
+ noMarkup: true
40906
+ });
40907
+ return res;
40908
+ }
40519
40909
  /**
40520
40910
  * Stringify / prettify a plain JS object to a JSON string, depending on the JSON options
40521
40911
  *
@@ -40773,6 +41163,42 @@ Examples:
40773
41163
  );
40774
41164
  return cmd;
40775
41165
  }
41166
+ var INPUT_FORMAT_CHOICES = enumChoices(InputFormat);
41167
+ function createExtractPlainTextCommand() {
41168
+ const bpg = new BitmarkParserGenerator();
41169
+ const cmd = new Command("extractPlainText").description("Extract plain text from bitmark, bitmark text, or JSON").argument(
41170
+ "[input]",
41171
+ "file to read, or text or json string. If not specified, input will be from <stdin>"
41172
+ ).addOption(
41173
+ new Option(
41174
+ "-f, --inputFormat <format>",
41175
+ "force input format (auto-detected by default)"
41176
+ ).choices([...INPUT_FORMAT_CHOICES])
41177
+ ).option("-a, --append", "append to the output file (default is to overwrite)").option("-o, --output <file>", "output file. If not specified, output will be to <stdout>").action(async (input, options) => {
41178
+ try {
41179
+ const dataIn = await readInput(input);
41180
+ const result = bpg.extractPlainText(dataIn, {
41181
+ inputFormat: Enum(InputFormat).fromValue(options.inputFormat)
41182
+ });
41183
+ await writeOutput(result ?? "", options.output, options.append);
41184
+ } catch (error) {
41185
+ console.error("Error:", error instanceof Error ? error.message : String(error));
41186
+ process.exit(1);
41187
+ }
41188
+ }).addHelpText(
41189
+ "after",
41190
+ `
41191
+ Examples:
41192
+ $ bitmark-parser extractPlainText '[{"type":"paragraph","content":[{"text":"Hello World","type":"text"}],"attrs":{}}]'
41193
+
41194
+ $ bitmark-parser extractPlainText input.json
41195
+
41196
+ $ bitmark-parser extractPlainText input.json -o output.txt
41197
+
41198
+ $ bitmark-parser extractPlainText -f bitmark input.bitmark`
41199
+ );
41200
+ return cmd;
41201
+ }
40776
41202
  var INFO_TYPE_CHOICES = (() => {
40777
41203
  const choices = new Set(enumChoices(InfoType));
40778
41204
  const ordered = [];
@@ -40900,6 +41326,7 @@ async function asyncInit() {
40900
41326
  });
40901
41327
  program.addCommand(createConvertCommand());
40902
41328
  program.addCommand(createConvertTextCommand());
41329
+ program.addCommand(createExtractPlainTextCommand());
40903
41330
  program.addCommand(createBreakscapeCommand());
40904
41331
  program.addCommand(createUnbreakscapeCommand());
40905
41332
  program.addCommand(createInfoCommand());